Bug Summary

File:include/llvm/ADT/SmallBitVector.h
Warning:line 125, column 3
Potential memory leak

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86 -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/X86 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86ShuffleDecodeConstantPool.h"
23#include "X86TargetMachine.h"
24#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/WinEHFuncInfo.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/DiagnosticInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalAlias.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/MC/MCAsmInfo.h"
51#include "llvm/MC/MCContext.h"
52#include "llvm/MC/MCExpr.h"
53#include "llvm/MC/MCSymbol.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/MathExtras.h"
59#include "llvm/Target/TargetOptions.h"
60#include <algorithm>
61#include <bitset>
62#include <cctype>
63#include <numeric>
64using namespace llvm;
65
66#define DEBUG_TYPE"x86-isel" "x86-isel"
67
68STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
69
70static cl::opt<bool> ExperimentalVectorWideningLegalization(
71 "x86-experimental-vector-widening-legalization", cl::init(false),
72 cl::desc("Enable an experimental vector type legalization through widening "
73 "rather than promotion."),
74 cl::Hidden);
75
76static cl::opt<int> ExperimentalPrefLoopAlignment(
77 "x86-experimental-pref-loop-alignment", cl::init(4),
78 cl::desc("Sets the preferable loop alignment for experiments "
79 "(the last x86-experimental-pref-loop-alignment bits"
80 " of the loop header PC will be 0)."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89/// Call this when the user attempts to do something unsupported, like
90/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
91/// report_fatal_error, so calling code should attempt to recover without
92/// crashing.
93static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
94 const char *Msg) {
95 MachineFunction &MF = DAG.getMachineFunction();
96 DAG.getContext()->diagnose(
97 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
98}
99
100X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
101 const X86Subtarget &STI)
102 : TargetLowering(TM), Subtarget(STI) {
103 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
104 X86ScalarSSEf64 = Subtarget.hasSSE2();
105 X86ScalarSSEf32 = Subtarget.hasSSE1();
106 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
107
108 // Set up the TargetLowering object.
109
110 // X86 is weird. It always uses i8 for shift amounts and setcc results.
111 setBooleanContents(ZeroOrOneBooleanContent);
112 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
113 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
114
115 // For 64-bit, since we have so many registers, use the ILP scheduler.
116 // For 32-bit, use the register pressure specific scheduling.
117 // For Atom, always use ILP scheduling.
118 if (Subtarget.isAtom())
119 setSchedulingPreference(Sched::ILP);
120 else if (Subtarget.is64Bit())
121 setSchedulingPreference(Sched::ILP);
122 else
123 setSchedulingPreference(Sched::RegPressure);
124 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
125 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
126
127 // Bypass expensive divides and use cheaper ones.
128 if (TM.getOptLevel() >= CodeGenOpt::Default) {
129 if (Subtarget.hasSlowDivide32())
130 addBypassSlowDiv(32, 8);
131 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
132 addBypassSlowDiv(64, 32);
133 }
134
135 if (Subtarget.isTargetKnownWindowsMSVC() ||
136 Subtarget.isTargetWindowsItanium()) {
137 // Setup Windows compiler runtime calls.
138 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
139 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
140 setLibcallName(RTLIB::SREM_I64, "_allrem");
141 setLibcallName(RTLIB::UREM_I64, "_aullrem");
142 setLibcallName(RTLIB::MUL_I64, "_allmul");
143 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
146 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
147 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
148 }
149
150 if (Subtarget.isTargetDarwin()) {
151 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
152 setUseUnderscoreSetJmp(false);
153 setUseUnderscoreLongJmp(false);
154 } else if (Subtarget.isTargetWindowsGNU()) {
155 // MS runtime is weird: it exports _setjmp, but longjmp!
156 setUseUnderscoreSetJmp(true);
157 setUseUnderscoreLongJmp(false);
158 } else {
159 setUseUnderscoreSetJmp(true);
160 setUseUnderscoreLongJmp(true);
161 }
162
163 // Set up the register classes.
164 addRegisterClass(MVT::i8, &X86::GR8RegClass);
165 addRegisterClass(MVT::i16, &X86::GR16RegClass);
166 addRegisterClass(MVT::i32, &X86::GR32RegClass);
167 if (Subtarget.is64Bit())
168 addRegisterClass(MVT::i64, &X86::GR64RegClass);
169
170 for (MVT VT : MVT::integer_valuetypes())
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
172
173 // We don't accept any truncstore of integer registers.
174 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
175 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
176 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
177 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
178 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
179 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
180
181 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
182
183 // SETOEQ and SETUNE require checking two conditions.
184 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
185 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
186 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
187 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
188 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
189 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
190
191 // Integer absolute.
192 if (Subtarget.hasCMov()) {
193 setOperationAction(ISD::ABS , MVT::i16 , Custom);
194 setOperationAction(ISD::ABS , MVT::i32 , Custom);
195 if (Subtarget.is64Bit())
196 setOperationAction(ISD::ABS , MVT::i64 , Custom);
197 }
198
199 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
200 // operation.
201 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
202 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
203 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
204
205 if (Subtarget.is64Bit()) {
206 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
207 // f32/f64 are legal, f80 is custom.
208 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
209 else
210 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
211 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
212 } else if (!Subtarget.useSoftFloat()) {
213 // We have an algorithm for SSE2->double, and we turn this into a
214 // 64-bit FILD followed by conditional FADD for other targets.
215 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
216 // We have an algorithm for SSE2, and we turn this into a 64-bit
217 // FILD or VCVTUSI2SS/SD for other targets.
218 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
219 } else {
220 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
221 }
222
223 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
224 // this operation.
225 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
226 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
227
228 if (!Subtarget.useSoftFloat()) {
229 // SSE has no i16 to fp conversion, only i32.
230 if (X86ScalarSSEf32) {
231 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
232 // f32 and f64 cases are Legal, f80 case is not
233 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
234 } else {
235 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
236 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
237 }
238 } else {
239 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
240 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
241 }
242
243 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
244 // this operation.
245 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
246 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
247
248 if (!Subtarget.useSoftFloat()) {
249 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
250 // are Legal, f80 is custom lowered.
251 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
252 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
253
254 if (X86ScalarSSEf32) {
255 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
256 // f32 and f64 cases are Legal, f80 case is not
257 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
258 } else {
259 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
260 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
261 }
262 } else {
263 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
264 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
265 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
266 }
267
268 // Handle FP_TO_UINT by promoting the destination to a larger signed
269 // conversion.
270 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
271 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
272 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
273
274 if (Subtarget.is64Bit()) {
275 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
276 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
277 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
278 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
279 } else {
280 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
281 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
282 }
283 } else if (!Subtarget.useSoftFloat()) {
284 // Since AVX is a superset of SSE3, only check for SSE here.
285 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
286 // Expand FP_TO_UINT into a select.
287 // FIXME: We would like to use a Custom expander here eventually to do
288 // the optimal thing for SSE vs. the default expansion in the legalizer.
289 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
290 else
291 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
292 // With SSE3 we can use fisttpll to convert to a signed i64; without
293 // SSE, we're stuck with a fistpll.
294 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
295
296 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
297 }
298
299 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
300 if (!X86ScalarSSEf64) {
301 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
302 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
303 if (Subtarget.is64Bit()) {
304 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
305 // Without SSE, i64->f64 goes through memory.
306 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
307 }
308 } else if (!Subtarget.is64Bit())
309 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
310
311 // Scalar integer divide and remainder are lowered to use operations that
312 // produce two results, to match the available instructions. This exposes
313 // the two-result form to trivial CSE, which is able to combine x/y and x%y
314 // into a single instruction.
315 //
316 // Scalar integer multiply-high is also lowered to use two-result
317 // operations, to match the available instructions. However, plain multiply
318 // (low) operations are left as Legal, as there are single-result
319 // instructions for this in x86. Using the two-result multiply instructions
320 // when both high and low results are needed must be arranged by dagcombine.
321 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
322 setOperationAction(ISD::MULHS, VT, Expand);
323 setOperationAction(ISD::MULHU, VT, Expand);
324 setOperationAction(ISD::SDIV, VT, Expand);
325 setOperationAction(ISD::UDIV, VT, Expand);
326 setOperationAction(ISD::SREM, VT, Expand);
327 setOperationAction(ISD::UREM, VT, Expand);
328 }
329
330 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
331 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
332 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
333 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
334 setOperationAction(ISD::BR_CC, VT, Expand);
335 setOperationAction(ISD::SELECT_CC, VT, Expand);
336 }
337 if (Subtarget.is64Bit())
338 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
340 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
341 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
342 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
343
344 setOperationAction(ISD::FREM , MVT::f32 , Expand);
345 setOperationAction(ISD::FREM , MVT::f64 , Expand);
346 setOperationAction(ISD::FREM , MVT::f80 , Expand);
347 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
348
349 // Promote the i8 variants and force them on up to i32 which has a shorter
350 // encoding.
351 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
352 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
353 if (!Subtarget.hasBMI()) {
354 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
355 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
356 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
357 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
358 if (Subtarget.is64Bit()) {
359 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
360 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
361 }
362 }
363
364 if (Subtarget.hasLZCNT()) {
365 // When promoting the i8 variants, force them to i32 for a shorter
366 // encoding.
367 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
368 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
369 } else {
370 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
371 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
372 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
373 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
374 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
375 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
376 if (Subtarget.is64Bit()) {
377 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
378 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
379 }
380 }
381
382 // Special handling for half-precision floating point conversions.
383 // If we don't have F16C support, then lower half float conversions
384 // into library calls.
385 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
386 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
387 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
388 }
389
390 // There's never any support for operations beyond MVT::f32.
391 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
392 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
393 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
394 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
395
396 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
397 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
398 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
399 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
400 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
401 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
402
403 if (Subtarget.hasPOPCNT()) {
404 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
405 } else {
406 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
407 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
408 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
409 if (Subtarget.is64Bit())
410 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
411 }
412
413 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
414
415 if (!Subtarget.hasMOVBE())
416 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
417
418 // These should be promoted to a larger select which is supported.
419 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
420 // X86 wants to expand cmov itself.
421 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
422 setOperationAction(ISD::SELECT, VT, Custom);
423 setOperationAction(ISD::SETCC, VT, Custom);
424 }
425 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
426 if (VT == MVT::i64 && !Subtarget.is64Bit())
427 continue;
428 setOperationAction(ISD::SELECT, VT, Custom);
429 setOperationAction(ISD::SETCC, VT, Custom);
430 }
431
432 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
433 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
434 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
435
436 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
437 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
438 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
439 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
440 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
441 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
442 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
443 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
444
445 // Darwin ABI issue.
446 for (auto VT : { MVT::i32, MVT::i64 }) {
447 if (VT == MVT::i64 && !Subtarget.is64Bit())
448 continue;
449 setOperationAction(ISD::ConstantPool , VT, Custom);
450 setOperationAction(ISD::JumpTable , VT, Custom);
451 setOperationAction(ISD::GlobalAddress , VT, Custom);
452 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
453 setOperationAction(ISD::ExternalSymbol , VT, Custom);
454 setOperationAction(ISD::BlockAddress , VT, Custom);
455 }
456
457 // 64-bit shl, sra, srl (iff 32-bit x86)
458 for (auto VT : { MVT::i32, MVT::i64 }) {
459 if (VT == MVT::i64 && !Subtarget.is64Bit())
460 continue;
461 setOperationAction(ISD::SHL_PARTS, VT, Custom);
462 setOperationAction(ISD::SRA_PARTS, VT, Custom);
463 setOperationAction(ISD::SRL_PARTS, VT, Custom);
464 }
465
466 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
467 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
468
469 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
470
471 // Expand certain atomics
472 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
473 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
477 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
478 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
479 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
480 }
481
482 if (Subtarget.hasCmpxchg16b()) {
483 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
484 }
485
486 // FIXME - use subtarget debug flags
487 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
488 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
489 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
490 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
491 }
492
493 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
494 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
495
496 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
497 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
498
499 setOperationAction(ISD::TRAP, MVT::Other, Legal);
500 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
501
502 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
503 setOperationAction(ISD::VASTART , MVT::Other, Custom);
504 setOperationAction(ISD::VAEND , MVT::Other, Expand);
505 bool Is64Bit = Subtarget.is64Bit();
506 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
507 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
508
509 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
510 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
511
512 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
513
514 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
515 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
516 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
517
518 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
519 // f32 and f64 use SSE.
520 // Set up the FP register classes.
521 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
522 : &X86::FR32RegClass);
523 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
524 : &X86::FR64RegClass);
525
526 for (auto VT : { MVT::f32, MVT::f64 }) {
527 // Use ANDPD to simulate FABS.
528 setOperationAction(ISD::FABS, VT, Custom);
529
530 // Use XORP to simulate FNEG.
531 setOperationAction(ISD::FNEG, VT, Custom);
532
533 // Use ANDPD and ORPD to simulate FCOPYSIGN.
534 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
535
536 // We don't support sin/cos/fmod
537 setOperationAction(ISD::FSIN , VT, Expand);
538 setOperationAction(ISD::FCOS , VT, Expand);
539 setOperationAction(ISD::FSINCOS, VT, Expand);
540 }
541
542 // Lower this to MOVMSK plus an AND.
543 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
544 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
545
546 } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
547 // Use SSE for f32, x87 for f64.
548 // Set up the FP register classes.
549 addRegisterClass(MVT::f32, &X86::FR32RegClass);
550 if (UseX87)
551 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
552
553 // Use ANDPS to simulate FABS.
554 setOperationAction(ISD::FABS , MVT::f32, Custom);
555
556 // Use XORP to simulate FNEG.
557 setOperationAction(ISD::FNEG , MVT::f32, Custom);
558
559 if (UseX87)
560 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
561
562 // Use ANDPS and ORPS to simulate FCOPYSIGN.
563 if (UseX87)
564 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
565 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
566
567 // We don't support sin/cos/fmod
568 setOperationAction(ISD::FSIN , MVT::f32, Expand);
569 setOperationAction(ISD::FCOS , MVT::f32, Expand);
570 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
571
572 if (UseX87) {
573 // Always expand sin/cos functions even though x87 has an instruction.
574 setOperationAction(ISD::FSIN, MVT::f64, Expand);
575 setOperationAction(ISD::FCOS, MVT::f64, Expand);
576 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
577 }
578 } else if (UseX87) {
579 // f32 and f64 in x87.
580 // Set up the FP register classes.
581 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
582 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
583
584 for (auto VT : { MVT::f32, MVT::f64 }) {
585 setOperationAction(ISD::UNDEF, VT, Expand);
586 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
587
588 // Always expand sin/cos functions even though x87 has an instruction.
589 setOperationAction(ISD::FSIN , VT, Expand);
590 setOperationAction(ISD::FCOS , VT, Expand);
591 setOperationAction(ISD::FSINCOS, VT, Expand);
592 }
593 }
594
595 // Expand FP32 immediates into loads from the stack, save special cases.
596 if (isTypeLegal(MVT::f32)) {
597 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
598 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
599 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
600 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
601 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
602 } else // SSE immediates.
603 addLegalFPImmediate(APFloat(+0.0f)); // xorps
604 }
605 // Expand FP64 immediates into loads from the stack, save special cases.
606 if (isTypeLegal(MVT::f64)) {
607 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
608 addLegalFPImmediate(APFloat(+0.0)); // FLD0
609 addLegalFPImmediate(APFloat(+1.0)); // FLD1
610 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
611 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
612 } else // SSE immediates.
613 addLegalFPImmediate(APFloat(+0.0)); // xorpd
614 }
615
616 // We don't support FMA.
617 setOperationAction(ISD::FMA, MVT::f64, Expand);
618 setOperationAction(ISD::FMA, MVT::f32, Expand);
619
620 // Long double always uses X87, except f128 in MMX.
621 if (UseX87) {
622 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
623 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
624 : &X86::VR128RegClass);
625 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
626 setOperationAction(ISD::FABS , MVT::f128, Custom);
627 setOperationAction(ISD::FNEG , MVT::f128, Custom);
628 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
629 }
630
631 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
632 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
633 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
634 {
635 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
636 addLegalFPImmediate(TmpFlt); // FLD0
637 TmpFlt.changeSign();
638 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
639
640 bool ignored;
641 APFloat TmpFlt2(+1.0);
642 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
643 &ignored);
644 addLegalFPImmediate(TmpFlt2); // FLD1
645 TmpFlt2.changeSign();
646 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
647 }
648
649 // Always expand sin/cos functions even though x87 has an instruction.
650 setOperationAction(ISD::FSIN , MVT::f80, Expand);
651 setOperationAction(ISD::FCOS , MVT::f80, Expand);
652 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
653
654 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
655 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
656 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
657 setOperationAction(ISD::FRINT, MVT::f80, Expand);
658 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
659 setOperationAction(ISD::FMA, MVT::f80, Expand);
660 }
661
662 // Always use a library call for pow.
663 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
664 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
665 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
666
667 setOperationAction(ISD::FLOG, MVT::f80, Expand);
668 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
669 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
670 setOperationAction(ISD::FEXP, MVT::f80, Expand);
671 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
672 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
673 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
674
675 // Some FP actions are always expanded for vector types.
676 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
677 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
678 setOperationAction(ISD::FSIN, VT, Expand);
679 setOperationAction(ISD::FSINCOS, VT, Expand);
680 setOperationAction(ISD::FCOS, VT, Expand);
681 setOperationAction(ISD::FREM, VT, Expand);
682 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
683 setOperationAction(ISD::FPOW, VT, Expand);
684 setOperationAction(ISD::FLOG, VT, Expand);
685 setOperationAction(ISD::FLOG2, VT, Expand);
686 setOperationAction(ISD::FLOG10, VT, Expand);
687 setOperationAction(ISD::FEXP, VT, Expand);
688 setOperationAction(ISD::FEXP2, VT, Expand);
689 }
690
691 // First set operation action for all vector types to either promote
692 // (for widening) or expand (for scalarization). Then we will selectively
693 // turn on ones that can be effectively codegen'd.
694 for (MVT VT : MVT::vector_valuetypes()) {
695 setOperationAction(ISD::SDIV, VT, Expand);
696 setOperationAction(ISD::UDIV, VT, Expand);
697 setOperationAction(ISD::SREM, VT, Expand);
698 setOperationAction(ISD::UREM, VT, Expand);
699 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
700 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
701 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
702 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
703 setOperationAction(ISD::FMA, VT, Expand);
704 setOperationAction(ISD::FFLOOR, VT, Expand);
705 setOperationAction(ISD::FCEIL, VT, Expand);
706 setOperationAction(ISD::FTRUNC, VT, Expand);
707 setOperationAction(ISD::FRINT, VT, Expand);
708 setOperationAction(ISD::FNEARBYINT, VT, Expand);
709 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
710 setOperationAction(ISD::MULHS, VT, Expand);
711 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
712 setOperationAction(ISD::MULHU, VT, Expand);
713 setOperationAction(ISD::SDIVREM, VT, Expand);
714 setOperationAction(ISD::UDIVREM, VT, Expand);
715 setOperationAction(ISD::CTPOP, VT, Expand);
716 setOperationAction(ISD::CTTZ, VT, Expand);
717 setOperationAction(ISD::CTLZ, VT, Expand);
718 setOperationAction(ISD::ROTL, VT, Expand);
719 setOperationAction(ISD::ROTR, VT, Expand);
720 setOperationAction(ISD::BSWAP, VT, Expand);
721 setOperationAction(ISD::SETCC, VT, Expand);
722 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
723 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
724 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
725 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
726 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
727 setOperationAction(ISD::TRUNCATE, VT, Expand);
728 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
729 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
730 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
731 setOperationAction(ISD::SELECT_CC, VT, Expand);
732 for (MVT InnerVT : MVT::vector_valuetypes()) {
733 setTruncStoreAction(InnerVT, VT, Expand);
734
735 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
736 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
737
738 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
739 // types, we have to deal with them whether we ask for Expansion or not.
740 // Setting Expand causes its own optimisation problems though, so leave
741 // them legal.
742 if (VT.getVectorElementType() == MVT::i1)
743 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
744
745 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
746 // split/scalarized right now.
747 if (VT.getVectorElementType() == MVT::f16)
748 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
749 }
750 }
751
752 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
753 // with -msoft-float, disable use of MMX as well.
754 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
755 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
756 // No operations on x86mmx supported, everything uses intrinsics.
757 }
758
759 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
760 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
761 : &X86::VR128RegClass);
762
763 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
764 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
765 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
766 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
767 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
768 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
769 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
770 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
771 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
772 }
773
774 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
775 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
776 : &X86::VR128RegClass);
777
778 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
779 // registers cannot be used even for integer operations.
780 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
781 : &X86::VR128RegClass);
782 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
783 : &X86::VR128RegClass);
784 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
785 : &X86::VR128RegClass);
786 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
787 : &X86::VR128RegClass);
788
789 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
790 setOperationAction(ISD::SREM, MVT::v2i32, Custom);
791 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
792 setOperationAction(ISD::UREM, MVT::v2i32, Custom);
793
794 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
795 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
796 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
797 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
798 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
799 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
800 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
801 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
802 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
803 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
804 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
805 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
806 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
807
808 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
809 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
810 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
811 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
812 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
813 }
814
815 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
816 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
817 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
818
819 // Provide custom widening for v2f32 setcc. This is really for VLX when
820 // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
821 // type legalization changing the result type to v4i1 during widening.
822 // It works fine for SSE2 and is probably faster so no need to qualify with
823 // VLX support.
824 setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
825
826 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
827 setOperationAction(ISD::SETCC, VT, Custom);
828 setOperationAction(ISD::CTPOP, VT, Custom);
829
830 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
831 // setcc all the way to isel and prefer SETGT in some isel patterns.
832 setCondCodeAction(ISD::SETLT, VT, Custom);
833 setCondCodeAction(ISD::SETLE, VT, Custom);
834 }
835
836 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
837 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
838 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
839 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
840 setOperationAction(ISD::VSELECT, VT, Custom);
841 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
842 }
843
844 // We support custom legalizing of sext and anyext loads for specific
845 // memory vector types which we can load as a scalar (or sequence of
846 // scalars) and extend in-register to a legal 128-bit vector type. For sext
847 // loads these must work with a single scalar load.
848 for (MVT VT : MVT::integer_vector_valuetypes()) {
849 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
850 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
851 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
852 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
853 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
854 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
855 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
856 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
857 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
858 }
859
860 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
861 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
862 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
863 setOperationAction(ISD::VSELECT, VT, Custom);
864
865 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
866 continue;
867
868 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
869 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
870 }
871
872 // Custom lower v2i64 and v2f64 selects.
873 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
874 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
875 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
876 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
877 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
878
879 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
880 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
881
882 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
883 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
884
885 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
886
887 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
888 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
889
890 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
891 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
892
893 for (MVT VT : MVT::fp_vector_valuetypes())
894 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
895
896 // We want to legalize this to an f64 load rather than an i64 load on
897 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
898 // store.
899 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
900 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
901
902 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
903 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
904 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
905 if (!Subtarget.hasAVX512())
906 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
907
908 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
909 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
910 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
911
912 // In the customized shift lowering, the legal v4i32/v2i64 cases
913 // in AVX2 will be recognized.
914 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
915 setOperationAction(ISD::SRL, VT, Custom);
916 setOperationAction(ISD::SHL, VT, Custom);
917 setOperationAction(ISD::SRA, VT, Custom);
918 }
919
920 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
921 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
922 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
923 }
924
925 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
926 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
927 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
928 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
929 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
930 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
931 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
932 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
933 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
934 }
935
936 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
937 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
938 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
939 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
940 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
941 setOperationAction(ISD::FRINT, RoundedTy, Legal);
942 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
943 }
944
945 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
946 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
947 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
948 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
949 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
950 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
951 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
952 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
953
954 // FIXME: Do we need to handle scalar-to-vector here?
955 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
956
957 // We directly match byte blends in the backend as they match the VSELECT
958 // condition form.
959 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
960
961 // SSE41 brings specific instructions for doing vector sign extend even in
962 // cases where we don't have SRA.
963 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
964 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
965 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
966 }
967
968 for (MVT VT : MVT::integer_vector_valuetypes()) {
969 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
970 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
971 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
972 }
973
974 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
975 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
976 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
977 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
978 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
979 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
980 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
981 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
982 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
983 }
984
985 // i8 vectors are custom because the source register and source
986 // source memory operand types are not the same width.
987 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
988 }
989
990 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
991 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
992 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
993 setOperationAction(ISD::ROTL, VT, Custom);
994
995 // XOP can efficiently perform BITREVERSE with VPPERM.
996 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
997 setOperationAction(ISD::BITREVERSE, VT, Custom);
998
999 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1000 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1001 setOperationAction(ISD::BITREVERSE, VT, Custom);
1002 }
1003
1004 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1005 bool HasInt256 = Subtarget.hasInt256();
1006
1007 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1008 : &X86::VR256RegClass);
1009 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1010 : &X86::VR256RegClass);
1011 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1012 : &X86::VR256RegClass);
1013 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1014 : &X86::VR256RegClass);
1015 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1016 : &X86::VR256RegClass);
1017 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1018 : &X86::VR256RegClass);
1019
1020 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1021 setOperationAction(ISD::FFLOOR, VT, Legal);
1022 setOperationAction(ISD::FCEIL, VT, Legal);
1023 setOperationAction(ISD::FTRUNC, VT, Legal);
1024 setOperationAction(ISD::FRINT, VT, Legal);
1025 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1026 setOperationAction(ISD::FNEG, VT, Custom);
1027 setOperationAction(ISD::FABS, VT, Custom);
1028 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1029 }
1030
1031 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1032 // even though v8i16 is a legal type.
1033 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1034 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1035 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1036
1037 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1038 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1039
1040 if (!Subtarget.hasAVX512())
1041 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1042
1043 for (MVT VT : MVT::fp_vector_valuetypes())
1044 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1045
1046 // In the customized shift lowering, the legal v8i32/v4i64 cases
1047 // in AVX2 will be recognized.
1048 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1049 setOperationAction(ISD::SRL, VT, Custom);
1050 setOperationAction(ISD::SHL, VT, Custom);
1051 setOperationAction(ISD::SRA, VT, Custom);
1052 }
1053
1054 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1055 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1056 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1057
1058 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1059 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1060 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1061 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1062 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1063 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1064
1065 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1066 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1067 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1068 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1069 }
1070
1071 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1072 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1073 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1074 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1075
1076 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1077 setOperationAction(ISD::SETCC, VT, Custom);
1078 setOperationAction(ISD::CTPOP, VT, Custom);
1079 setOperationAction(ISD::CTLZ, VT, Custom);
1080
1081 // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
1082 setOperationAction(ISD::CTTZ, VT, HasInt256 ? Expand : Custom);
1083
1084 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1085 // setcc all the way to isel and prefer SETGT in some isel patterns.
1086 setCondCodeAction(ISD::SETLT, VT, Custom);
1087 setCondCodeAction(ISD::SETLE, VT, Custom);
1088 }
1089
1090 if (Subtarget.hasAnyFMA()) {
1091 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1092 MVT::v2f64, MVT::v4f64 })
1093 setOperationAction(ISD::FMA, VT, Legal);
1094 }
1095
1096 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1097 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1098 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1099 }
1100
1101 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1102 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1103 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1104 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1105
1106 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1107 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1108 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1109 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1110 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1111 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1112
1113 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1114 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1115 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1116 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1117
1118 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1119 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1120 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1121 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1122 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1123 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1124 }
1125
1126 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1127 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1128 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1129 }
1130
1131 if (HasInt256) {
1132 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1133 // when we have a 256bit-wide blend with immediate.
1134 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1135
1136 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1137 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1138 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1139 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1140 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1141 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1142 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1143 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1144 }
1145 }
1146
1147 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1148 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1149 setOperationAction(ISD::MLOAD, VT, Legal);
1150 setOperationAction(ISD::MSTORE, VT, Legal);
1151 }
1152
1153 // Extract subvector is special because the value type
1154 // (result) is 128-bit but the source is 256-bit wide.
1155 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1156 MVT::v4f32, MVT::v2f64 }) {
1157 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1158 }
1159
1160 // Custom lower several nodes for 256-bit types.
1161 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1162 MVT::v8f32, MVT::v4f64 }) {
1163 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1164 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1165 setOperationAction(ISD::VSELECT, VT, Custom);
1166 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1167 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1168 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1169 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1170 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1171 }
1172
1173 if (HasInt256)
1174 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1175
1176 if (HasInt256) {
1177 // Custom legalize 2x32 to get a little better code.
1178 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1179 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1180
1181 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1182 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1183 setOperationAction(ISD::MGATHER, VT, Custom);
1184 }
1185 }
1186
1187 // This block controls legalization of the mask vector sizes that are
1188 // available with AVX512. 512-bit vectors are in a separate block controlled
1189 // by useAVX512Regs.
1190 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1191 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1192 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1193 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1194 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1195 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1196
1197 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1198 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1199 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1200
1201 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1202 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1203 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1204 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1205 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1206 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1207
1208 // There is no byte sized k-register load or store without AVX512DQ.
1209 if (!Subtarget.hasDQI()) {
1210 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1211 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1212 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1213 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1214
1215 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1216 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1217 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1218 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1219 }
1220
1221 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1222 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1223 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1224 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1225 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1226 }
1227
1228 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1229 setOperationAction(ISD::ADD, VT, Custom);
1230 setOperationAction(ISD::SUB, VT, Custom);
1231 setOperationAction(ISD::MUL, VT, Custom);
1232 setOperationAction(ISD::SETCC, VT, Custom);
1233 setOperationAction(ISD::SELECT, VT, Custom);
1234 setOperationAction(ISD::TRUNCATE, VT, Custom);
1235
1236 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1237 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1238 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1239 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1240 setOperationAction(ISD::VSELECT, VT, Expand);
1241 }
1242
1243 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1244 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1245 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1246 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
1247 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1248 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1249 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1250 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1251 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1252 }
1253
1254 // This block controls legalization for 512-bit operations with 32/64 bit
1255 // elements. 512-bits can be disabled based on prefer-vector-width and
1256 // required-vector-width function attributes.
1257 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1258 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1259 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1260 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1261 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1262
1263 for (MVT VT : MVT::fp_vector_valuetypes())
1264 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1265
1266 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1267 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1268 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1269 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1270 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1271 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1272 }
1273
1274 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1275 setOperationAction(ISD::FNEG, VT, Custom);
1276 setOperationAction(ISD::FABS, VT, Custom);
1277 setOperationAction(ISD::FMA, VT, Legal);
1278 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1279 }
1280
1281 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1282 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1283 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1284 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1285 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1286 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1287 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1288 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1289 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1290 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1291
1292 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1293 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1294 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1295 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1296 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1297
1298 if (!Subtarget.hasVLX()) {
1299 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1300 // to 512-bit rather than use the AVX2 instructions so that we can use
1301 // k-masks.
1302 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1303 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1304 setOperationAction(ISD::MLOAD, VT, Custom);
1305 setOperationAction(ISD::MSTORE, VT, Custom);
1306 }
1307 }
1308
1309 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1310 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1311 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1312 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1313 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1314 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1315 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1316 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1317
1318 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1319 setOperationAction(ISD::FFLOOR, VT, Legal);
1320 setOperationAction(ISD::FCEIL, VT, Legal);
1321 setOperationAction(ISD::FTRUNC, VT, Legal);
1322 setOperationAction(ISD::FRINT, VT, Legal);
1323 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1324 }
1325
1326 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
1327 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
1328
1329 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1330 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1331 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1332
1333 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1334 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1335 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1336 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1337
1338 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1339 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1340
1341 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1342 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1343
1344 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1345 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1346 setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
1347 setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
1348 setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
1349 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1350
1351 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1352 setOperationAction(ISD::SMAX, VT, Legal);
1353 setOperationAction(ISD::UMAX, VT, Legal);
1354 setOperationAction(ISD::SMIN, VT, Legal);
1355 setOperationAction(ISD::UMIN, VT, Legal);
1356 setOperationAction(ISD::ABS, VT, Legal);
1357 setOperationAction(ISD::SRL, VT, Custom);
1358 setOperationAction(ISD::SHL, VT, Custom);
1359 setOperationAction(ISD::SRA, VT, Custom);
1360 setOperationAction(ISD::CTPOP, VT, Custom);
1361 setOperationAction(ISD::ROTL, VT, Custom);
1362 setOperationAction(ISD::ROTR, VT, Custom);
1363 setOperationAction(ISD::SETCC, VT, Custom);
1364
1365 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1366 // setcc all the way to isel and prefer SETGT in some isel patterns.
1367 setCondCodeAction(ISD::SETLT, VT, Custom);
1368 setCondCodeAction(ISD::SETLE, VT, Custom);
1369 }
1370
1371 if (Subtarget.hasDQI()) {
1372 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1373 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1374 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1375 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1376
1377 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1378 }
1379
1380 if (Subtarget.hasCDI()) {
1381 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1382 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1383 setOperationAction(ISD::CTLZ, VT, Legal);
1384 }
1385 } // Subtarget.hasCDI()
1386
1387 if (Subtarget.hasVPOPCNTDQ()) {
1388 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1389 setOperationAction(ISD::CTPOP, VT, Legal);
1390 }
1391
1392 // Extract subvector is special because the value type
1393 // (result) is 256-bit but the source is 512-bit wide.
1394 // 128-bit was made Legal under AVX1.
1395 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1396 MVT::v8f32, MVT::v4f64 })
1397 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1398
1399 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1400 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1401 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1402 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1403 setOperationAction(ISD::VSELECT, VT, Custom);
1404 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1405 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1406 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1407 setOperationAction(ISD::MLOAD, VT, Legal);
1408 setOperationAction(ISD::MSTORE, VT, Legal);
1409 setOperationAction(ISD::MGATHER, VT, Custom);
1410 setOperationAction(ISD::MSCATTER, VT, Custom);
1411 }
1412 // Need to custom split v32i16/v64i8 bitcasts.
1413 if (!Subtarget.hasBWI()) {
1414 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1415 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1416 }
1417 }// has AVX-512
1418
1419 // This block controls legalization for operations that don't have
1420 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1421 // narrower widths.
1422 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1423 // These operations are handled on non-VLX by artificially widening in
1424 // isel patterns.
1425 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1426
1427 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1428 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1429 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1430 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1431 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1432
1433 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1434 setOperationAction(ISD::SMAX, VT, Legal);
1435 setOperationAction(ISD::UMAX, VT, Legal);
1436 setOperationAction(ISD::SMIN, VT, Legal);
1437 setOperationAction(ISD::UMIN, VT, Legal);
1438 setOperationAction(ISD::ABS, VT, Legal);
1439 }
1440
1441 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1442 setOperationAction(ISD::ROTL, VT, Custom);
1443 setOperationAction(ISD::ROTR, VT, Custom);
1444 }
1445
1446 // Custom legalize 2x32 to get a little better code.
1447 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1448 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1449
1450 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1451 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1452 setOperationAction(ISD::MSCATTER, VT, Custom);
1453
1454 if (Subtarget.hasDQI()) {
1455 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1456 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1457 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1458 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1459 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1460
1461 setOperationAction(ISD::MUL, VT, Legal);
1462 }
1463 }
1464
1465 if (Subtarget.hasCDI()) {
1466 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1467 setOperationAction(ISD::CTLZ, VT, Legal);
1468 }
1469 } // Subtarget.hasCDI()
1470
1471 if (Subtarget.hasVPOPCNTDQ()) {
1472 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1473 setOperationAction(ISD::CTPOP, VT, Legal);
1474 }
1475 }
1476
1477 // This block control legalization of v32i1/v64i1 which are available with
1478 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1479 // useBWIRegs.
1480 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1481 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1482 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1483
1484 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1485 setOperationAction(ISD::ADD, VT, Custom);
1486 setOperationAction(ISD::SUB, VT, Custom);
1487 setOperationAction(ISD::MUL, VT, Custom);
1488 setOperationAction(ISD::VSELECT, VT, Expand);
1489
1490 setOperationAction(ISD::TRUNCATE, VT, Custom);
1491 setOperationAction(ISD::SETCC, VT, Custom);
1492 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1493 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1494 setOperationAction(ISD::SELECT, VT, Custom);
1495 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1496 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1497 }
1498
1499 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1500 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1501 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1502 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1503 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1504 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1505
1506 // Extends from v32i1 masks to 256-bit vectors.
1507 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1508 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1509 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1510 }
1511
1512 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1513 // disabled based on prefer-vector-width and required-vector-width function
1514 // attributes.
1515 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1516 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1517 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1518
1519 // Extends from v64i1 masks to 512-bit vectors.
1520 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1521 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1522 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1523
1524 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1525 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1526 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1527 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1528 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1529 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1530 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1531 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1532 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1533 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1534 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1535 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1536 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1537 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1538 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1539 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1540 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1541 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1542 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1543 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1544 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1545 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1546 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1547
1548 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1549
1550 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1551
1552 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1553 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1554 setOperationAction(ISD::VSELECT, VT, Custom);
1555 setOperationAction(ISD::ABS, VT, Legal);
1556 setOperationAction(ISD::SRL, VT, Custom);
1557 setOperationAction(ISD::SHL, VT, Custom);
1558 setOperationAction(ISD::SRA, VT, Custom);
1559 setOperationAction(ISD::MLOAD, VT, Legal);
1560 setOperationAction(ISD::MSTORE, VT, Legal);
1561 setOperationAction(ISD::CTPOP, VT, Custom);
1562 setOperationAction(ISD::CTLZ, VT, Custom);
1563 setOperationAction(ISD::SMAX, VT, Legal);
1564 setOperationAction(ISD::UMAX, VT, Legal);
1565 setOperationAction(ISD::SMIN, VT, Legal);
1566 setOperationAction(ISD::UMIN, VT, Legal);
1567 setOperationAction(ISD::SETCC, VT, Custom);
1568
1569 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1570 // setcc all the way to isel and prefer SETGT in some isel patterns.
1571 setCondCodeAction(ISD::SETLT, VT, Custom);
1572 setCondCodeAction(ISD::SETLE, VT, Custom);
1573 }
1574
1575 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1576 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1577 }
1578
1579 if (Subtarget.hasBITALG()) {
1580 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1581 setOperationAction(ISD::CTPOP, VT, Legal);
1582 }
1583 }
1584
1585 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1586 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1587 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1588 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1589 }
1590
1591 // These operations are handled on non-VLX by artificially widening in
1592 // isel patterns.
1593 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1594
1595 if (Subtarget.hasBITALG()) {
1596 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1597 setOperationAction(ISD::CTPOP, VT, Legal);
1598 }
1599 }
1600
1601 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1602 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1603 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1604 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1605 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1606 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1607
1608 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1609 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1610 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1611 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1612 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1613
1614 if (Subtarget.hasDQI()) {
1615 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1616 // v2f32 UINT_TO_FP is already custom under SSE2.
1617 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1618 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 1619, __PRETTY_FUNCTION__))
1619 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 1619, __PRETTY_FUNCTION__))
;
1620 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1621 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1622 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1623 }
1624
1625 if (Subtarget.hasBWI()) {
1626 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1627 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1628 }
1629 }
1630
1631 // We want to custom lower some of our intrinsics.
1632 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1633 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1634 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1635 if (!Subtarget.is64Bit()) {
1636 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1637 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1638 }
1639
1640 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1641 // handle type legalization for these operations here.
1642 //
1643 // FIXME: We really should do custom legalization for addition and
1644 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1645 // than generic legalization for 64-bit multiplication-with-overflow, though.
1646 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1647 if (VT == MVT::i64 && !Subtarget.is64Bit())
1648 continue;
1649 // Add/Sub/Mul with overflow operations are custom lowered.
1650 setOperationAction(ISD::SADDO, VT, Custom);
1651 setOperationAction(ISD::UADDO, VT, Custom);
1652 setOperationAction(ISD::SSUBO, VT, Custom);
1653 setOperationAction(ISD::USUBO, VT, Custom);
1654 setOperationAction(ISD::SMULO, VT, Custom);
1655 setOperationAction(ISD::UMULO, VT, Custom);
1656
1657 // Support carry in as value rather than glue.
1658 setOperationAction(ISD::ADDCARRY, VT, Custom);
1659 setOperationAction(ISD::SUBCARRY, VT, Custom);
1660 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1661 }
1662
1663 if (!Subtarget.is64Bit()) {
1664 // These libcalls are not available in 32-bit.
1665 setLibcallName(RTLIB::SHL_I128, nullptr);
1666 setLibcallName(RTLIB::SRL_I128, nullptr);
1667 setLibcallName(RTLIB::SRA_I128, nullptr);
1668 setLibcallName(RTLIB::MUL_I128, nullptr);
1669 }
1670
1671 // Combine sin / cos into _sincos_stret if it is available.
1672 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1673 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1674 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1675 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1676 }
1677
1678 if (Subtarget.isTargetWin64()) {
1679 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1680 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1681 setOperationAction(ISD::SREM, MVT::i128, Custom);
1682 setOperationAction(ISD::UREM, MVT::i128, Custom);
1683 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1684 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1685 }
1686
1687 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1688 // is. We should promote the value to 64-bits to solve this.
1689 // This is what the CRT headers do - `fmodf` is an inline header
1690 // function casting to f64 and calling `fmod`.
1691 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1692 Subtarget.isTargetWindowsItanium()))
1693 for (ISD::NodeType Op :
1694 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1695 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1696 if (isOperationExpand(Op, MVT::f32))
1697 setOperationAction(Op, MVT::f32, Promote);
1698
1699 // We have target-specific dag combine patterns for the following nodes:
1700 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1701 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1702 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1703 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1704 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1705 setTargetDAGCombine(ISD::BITCAST);
1706 setTargetDAGCombine(ISD::VSELECT);
1707 setTargetDAGCombine(ISD::SELECT);
1708 setTargetDAGCombine(ISD::SHL);
1709 setTargetDAGCombine(ISD::SRA);
1710 setTargetDAGCombine(ISD::SRL);
1711 setTargetDAGCombine(ISD::OR);
1712 setTargetDAGCombine(ISD::AND);
1713 setTargetDAGCombine(ISD::ADD);
1714 setTargetDAGCombine(ISD::FADD);
1715 setTargetDAGCombine(ISD::FSUB);
1716 setTargetDAGCombine(ISD::FNEG);
1717 setTargetDAGCombine(ISD::FMA);
1718 setTargetDAGCombine(ISD::FMINNUM);
1719 setTargetDAGCombine(ISD::FMAXNUM);
1720 setTargetDAGCombine(ISD::SUB);
1721 setTargetDAGCombine(ISD::LOAD);
1722 setTargetDAGCombine(ISD::MLOAD);
1723 setTargetDAGCombine(ISD::STORE);
1724 setTargetDAGCombine(ISD::MSTORE);
1725 setTargetDAGCombine(ISD::TRUNCATE);
1726 setTargetDAGCombine(ISD::ZERO_EXTEND);
1727 setTargetDAGCombine(ISD::ANY_EXTEND);
1728 setTargetDAGCombine(ISD::SIGN_EXTEND);
1729 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1730 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1731 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1732 setTargetDAGCombine(ISD::SINT_TO_FP);
1733 setTargetDAGCombine(ISD::UINT_TO_FP);
1734 setTargetDAGCombine(ISD::SETCC);
1735 setTargetDAGCombine(ISD::MUL);
1736 setTargetDAGCombine(ISD::XOR);
1737 setTargetDAGCombine(ISD::MSCATTER);
1738 setTargetDAGCombine(ISD::MGATHER);
1739
1740 computeRegisterProperties(Subtarget.getRegisterInfo());
1741
1742 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1743 MaxStoresPerMemsetOptSize = 8;
1744 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1745 MaxStoresPerMemcpyOptSize = 4;
1746 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1747 MaxStoresPerMemmoveOptSize = 4;
1748
1749 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1750 // that needs to benchmarked and balanced with the potential use of vector
1751 // load/store types (PR33329, PR33914).
1752 MaxLoadsPerMemcmp = 2;
1753 MaxLoadsPerMemcmpOptSize = 2;
1754
1755 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1756 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1757
1758 // An out-of-order CPU can speculatively execute past a predictable branch,
1759 // but a conditional move could be stalled by an expensive earlier operation.
1760 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1761 EnableExtLdPromotion = true;
1762 setPrefFunctionAlignment(4); // 2^4 bytes.
1763
1764 verifyIntrinsicTables();
1765}
1766
1767// This has so far only been implemented for 64-bit MachO.
1768bool X86TargetLowering::useLoadStackGuardNode() const {
1769 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1770}
1771
1772bool X86TargetLowering::useStackGuardXorFP() const {
1773 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1774 return Subtarget.getTargetTriple().isOSMSVCRT();
1775}
1776
1777SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1778 const SDLoc &DL) const {
1779 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1780 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1781 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1782 return SDValue(Node, 0);
1783}
1784
1785TargetLoweringBase::LegalizeTypeAction
1786X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1787 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1788 return TypeSplitVector;
1789
1790 if (ExperimentalVectorWideningLegalization &&
1791 VT.getVectorNumElements() != 1 &&
1792 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1793 return TypeWidenVector;
1794
1795 return TargetLoweringBase::getPreferredVectorAction(VT);
1796}
1797
1798MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1799 CallingConv::ID CC,
1800 EVT VT) const {
1801 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1802 return MVT::v32i8;
1803 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1804}
1805
1806unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1807 CallingConv::ID CC,
1808 EVT VT) const {
1809 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1810 return 1;
1811 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1812}
1813
1814EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1815 LLVMContext& Context,
1816 EVT VT) const {
1817 if (!VT.isVector())
1818 return MVT::i8;
1819
1820 if (Subtarget.hasAVX512()) {
1821 const unsigned NumElts = VT.getVectorNumElements();
1822
1823 // Figure out what this type will be legalized to.
1824 EVT LegalVT = VT;
1825 while (getTypeAction(Context, LegalVT) != TypeLegal)
1826 LegalVT = getTypeToTransformTo(Context, LegalVT);
1827
1828 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1829 if (LegalVT.getSimpleVT().is512BitVector())
1830 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1831
1832 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1833 // If we legalized to less than a 512-bit vector, then we will use a vXi1
1834 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1835 // vXi16/vXi8.
1836 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1837 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
1838 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1839 }
1840 }
1841
1842 return VT.changeVectorElementTypeToInteger();
1843}
1844
1845/// Helper for getByValTypeAlignment to determine
1846/// the desired ByVal argument alignment.
1847static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1848 if (MaxAlign == 16)
1849 return;
1850 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1851 if (VTy->getBitWidth() == 128)
1852 MaxAlign = 16;
1853 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1854 unsigned EltAlign = 0;
1855 getMaxByValAlign(ATy->getElementType(), EltAlign);
1856 if (EltAlign > MaxAlign)
1857 MaxAlign = EltAlign;
1858 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1859 for (auto *EltTy : STy->elements()) {
1860 unsigned EltAlign = 0;
1861 getMaxByValAlign(EltTy, EltAlign);
1862 if (EltAlign > MaxAlign)
1863 MaxAlign = EltAlign;
1864 if (MaxAlign == 16)
1865 break;
1866 }
1867 }
1868}
1869
1870/// Return the desired alignment for ByVal aggregate
1871/// function arguments in the caller parameter area. For X86, aggregates
1872/// that contain SSE vectors are placed at 16-byte boundaries while the rest
1873/// are at 4-byte boundaries.
1874unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1875 const DataLayout &DL) const {
1876 if (Subtarget.is64Bit()) {
1877 // Max of 8 and alignment of type.
1878 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1879 if (TyAlign > 8)
1880 return TyAlign;
1881 return 8;
1882 }
1883
1884 unsigned Align = 4;
1885 if (Subtarget.hasSSE1())
1886 getMaxByValAlign(Ty, Align);
1887 return Align;
1888}
1889
1890/// Returns the target specific optimal type for load
1891/// and store operations as a result of memset, memcpy, and memmove
1892/// lowering. If DstAlign is zero that means it's safe to destination
1893/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1894/// means there isn't a need to check it against alignment requirement,
1895/// probably because the source does not need to be loaded. If 'IsMemset' is
1896/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1897/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1898/// source is constant so it does not need to be loaded.
1899/// It returns EVT::Other if the type should be determined using generic
1900/// target-independent logic.
1901EVT
1902X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1903 unsigned DstAlign, unsigned SrcAlign,
1904 bool IsMemset, bool ZeroMemset,
1905 bool MemcpyStrSrc,
1906 MachineFunction &MF) const {
1907 const Function &F = MF.getFunction();
1908 if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
1909 if (Size >= 16 &&
1910 (!Subtarget.isUnalignedMem16Slow() ||
1911 ((DstAlign == 0 || DstAlign >= 16) &&
1912 (SrcAlign == 0 || SrcAlign >= 16)))) {
1913 // FIXME: Check if unaligned 32-byte accesses are slow.
1914 if (Size >= 32 && Subtarget.hasAVX()) {
1915 // Although this isn't a well-supported type for AVX1, we'll let
1916 // legalization and shuffle lowering produce the optimal codegen. If we
1917 // choose an optimal type with a vector element larger than a byte,
1918 // getMemsetStores() may create an intermediate splat (using an integer
1919 // multiply) before we splat as a vector.
1920 return MVT::v32i8;
1921 }
1922 if (Subtarget.hasSSE2())
1923 return MVT::v16i8;
1924 // TODO: Can SSE1 handle a byte vector?
1925 // If we have SSE1 registers we should be able to use them.
1926 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
1927 return MVT::v4f32;
1928 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
1929 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
1930 // Do not use f64 to lower memcpy if source is string constant. It's
1931 // better to use i32 to avoid the loads.
1932 // Also, do not use f64 to lower memset unless this is a memset of zeros.
1933 // The gymnastics of splatting a byte value into an XMM register and then
1934 // only using 8-byte stores (because this is a CPU with slow unaligned
1935 // 16-byte accesses) makes that a loser.
1936 return MVT::f64;
1937 }
1938 }
1939 // This is a compromise. If we reach here, unaligned accesses may be slow on
1940 // this target. However, creating smaller, aligned accesses could be even
1941 // slower and would certainly be a lot more code.
1942 if (Subtarget.is64Bit() && Size >= 8)
1943 return MVT::i64;
1944 return MVT::i32;
1945}
1946
1947bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1948 if (VT == MVT::f32)
1949 return X86ScalarSSEf32;
1950 else if (VT == MVT::f64)
1951 return X86ScalarSSEf64;
1952 return true;
1953}
1954
1955bool
1956X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1957 unsigned,
1958 unsigned,
1959 bool *Fast) const {
1960 if (Fast) {
1961 switch (VT.getSizeInBits()) {
1962 default:
1963 // 8-byte and under are always assumed to be fast.
1964 *Fast = true;
1965 break;
1966 case 128:
1967 *Fast = !Subtarget.isUnalignedMem16Slow();
1968 break;
1969 case 256:
1970 *Fast = !Subtarget.isUnalignedMem32Slow();
1971 break;
1972 // TODO: What about AVX-512 (512-bit) accesses?
1973 }
1974 }
1975 // Misaligned accesses of any size are always allowed.
1976 return true;
1977}
1978
1979/// Return the entry encoding for a jump table in the
1980/// current function. The returned value is a member of the
1981/// MachineJumpTableInfo::JTEntryKind enum.
1982unsigned X86TargetLowering::getJumpTableEncoding() const {
1983 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1984 // symbol.
1985 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
1986 return MachineJumpTableInfo::EK_Custom32;
1987
1988 // Otherwise, use the normal jump table encoding heuristics.
1989 return TargetLowering::getJumpTableEncoding();
1990}
1991
1992bool X86TargetLowering::useSoftFloat() const {
1993 return Subtarget.useSoftFloat();
1994}
1995
1996void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
1997 ArgListTy &Args) const {
1998
1999 // Only relabel X86-32 for C / Stdcall CCs.
2000 if (Subtarget.is64Bit())
2001 return;
2002 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2003 return;
2004 unsigned ParamRegs = 0;
2005 if (auto *M = MF->getFunction().getParent())
2006 ParamRegs = M->getNumberRegisterParameters();
2007
2008 // Mark the first N int arguments as having reg
2009 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2010 Type *T = Args[Idx].Ty;
2011 if (T->isIntOrPtrTy())
2012 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2013 unsigned numRegs = 1;
2014 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2015 numRegs = 2;
2016 if (ParamRegs < numRegs)
2017 return;
2018 ParamRegs -= numRegs;
2019 Args[Idx].IsInReg = true;
2020 }
2021 }
2022}
2023
2024const MCExpr *
2025X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2026 const MachineBasicBlock *MBB,
2027 unsigned uid,MCContext &Ctx) const{
2028 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2028, __PRETTY_FUNCTION__))
;
2029 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2030 // entries.
2031 return MCSymbolRefExpr::create(MBB->getSymbol(),
2032 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2033}
2034
2035/// Returns relocation base for the given PIC jumptable.
2036SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2037 SelectionDAG &DAG) const {
2038 if (!Subtarget.is64Bit())
2039 // This doesn't have SDLoc associated with it, but is not really the
2040 // same as a Register.
2041 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2042 getPointerTy(DAG.getDataLayout()));
2043 return Table;
2044}
2045
2046/// This returns the relocation base for the given PIC jumptable,
2047/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2048const MCExpr *X86TargetLowering::
2049getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2050 MCContext &Ctx) const {
2051 // X86-64 uses RIP relative addressing based on the jump table label.
2052 if (Subtarget.isPICStyleRIPRel())
2053 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2054
2055 // Otherwise, the reference is relative to the PIC base.
2056 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2057}
2058
2059std::pair<const TargetRegisterClass *, uint8_t>
2060X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2061 MVT VT) const {
2062 const TargetRegisterClass *RRC = nullptr;
2063 uint8_t Cost = 1;
2064 switch (VT.SimpleTy) {
2065 default:
2066 return TargetLowering::findRepresentativeClass(TRI, VT);
2067 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2068 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2069 break;
2070 case MVT::x86mmx:
2071 RRC = &X86::VR64RegClass;
2072 break;
2073 case MVT::f32: case MVT::f64:
2074 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2075 case MVT::v4f32: case MVT::v2f64:
2076 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2077 case MVT::v8f32: case MVT::v4f64:
2078 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2079 case MVT::v16f32: case MVT::v8f64:
2080 RRC = &X86::VR128XRegClass;
2081 break;
2082 }
2083 return std::make_pair(RRC, Cost);
2084}
2085
2086unsigned X86TargetLowering::getAddressSpace() const {
2087 if (Subtarget.is64Bit())
2088 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2089 return 256;
2090}
2091
2092static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2093 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2094 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2095}
2096
2097static Constant* SegmentOffset(IRBuilder<> &IRB,
2098 unsigned Offset, unsigned AddressSpace) {
2099 return ConstantExpr::getIntToPtr(
2100 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2101 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2102}
2103
2104Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2105 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2106 // tcbhead_t; use it instead of the usual global variable (see
2107 // sysdeps/{i386,x86_64}/nptl/tls.h)
2108 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2109 if (Subtarget.isTargetFuchsia()) {
2110 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2111 return SegmentOffset(IRB, 0x10, getAddressSpace());
2112 } else {
2113 // %fs:0x28, unless we're using a Kernel code model, in which case
2114 // it's %gs:0x28. gs:0x14 on i386.
2115 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2116 return SegmentOffset(IRB, Offset, getAddressSpace());
2117 }
2118 }
2119
2120 return TargetLowering::getIRStackGuard(IRB);
2121}
2122
2123void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2124 // MSVC CRT provides functionalities for stack protection.
2125 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2126 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2127 // MSVC CRT has a global variable holding security cookie.
2128 M.getOrInsertGlobal("__security_cookie",
2129 Type::getInt8PtrTy(M.getContext()));
2130
2131 // MSVC CRT has a function to validate security cookie.
2132 auto *SecurityCheckCookie = cast<Function>(
2133 M.getOrInsertFunction("__security_check_cookie",
2134 Type::getVoidTy(M.getContext()),
2135 Type::getInt8PtrTy(M.getContext())));
2136 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2137 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2138 return;
2139 }
2140 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2141 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2142 return;
2143 TargetLowering::insertSSPDeclarations(M);
2144}
2145
2146Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2147 // MSVC CRT has a global variable holding security cookie.
2148 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2149 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2150 return M.getGlobalVariable("__security_cookie");
2151 }
2152 return TargetLowering::getSDagStackGuard(M);
2153}
2154
2155Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2156 // MSVC CRT has a function to validate security cookie.
2157 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2158 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2159 return M.getFunction("__security_check_cookie");
2160 }
2161 return TargetLowering::getSSPStackGuardCheck(M);
2162}
2163
2164Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2165 if (Subtarget.getTargetTriple().isOSContiki())
2166 return getDefaultSafeStackPointerLocation(IRB, false);
2167
2168 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2169 // definition of TLS_SLOT_SAFESTACK in
2170 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2171 if (Subtarget.isTargetAndroid()) {
2172 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2173 // %gs:0x24 on i386
2174 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2175 return SegmentOffset(IRB, Offset, getAddressSpace());
2176 }
2177
2178 // Fuchsia is similar.
2179 if (Subtarget.isTargetFuchsia()) {
2180 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2181 return SegmentOffset(IRB, 0x18, getAddressSpace());
2182 }
2183
2184 return TargetLowering::getSafeStackPointerLocation(IRB);
2185}
2186
2187bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2188 unsigned DestAS) const {
2189 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2189, __PRETTY_FUNCTION__))
;
2190
2191 return SrcAS < 256 && DestAS < 256;
2192}
2193
2194//===----------------------------------------------------------------------===//
2195// Return Value Calling Convention Implementation
2196//===----------------------------------------------------------------------===//
2197
2198#include "X86GenCallingConv.inc"
2199
2200bool X86TargetLowering::CanLowerReturn(
2201 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2202 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2203 SmallVector<CCValAssign, 16> RVLocs;
2204 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2205 return CCInfo.CheckReturn(Outs, RetCC_X86);
2206}
2207
2208const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2209 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2210 return ScratchRegs;
2211}
2212
2213/// Lowers masks values (v*i1) to the local register values
2214/// \returns DAG node after lowering to register type
2215static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2216 const SDLoc &Dl, SelectionDAG &DAG) {
2217 EVT ValVT = ValArg.getValueType();
2218
2219 if (ValVT == MVT::v1i1)
2220 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2221 DAG.getIntPtrConstant(0, Dl));
2222
2223 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2224 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2225 // Two stage lowering might be required
2226 // bitcast: v8i1 -> i8 / v16i1 -> i16
2227 // anyextend: i8 -> i32 / i16 -> i32
2228 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2229 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2230 if (ValLoc == MVT::i32)
2231 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2232 return ValToCopy;
2233 }
2234
2235 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2236 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2237 // One stage lowering is required
2238 // bitcast: v32i1 -> i32 / v64i1 -> i64
2239 return DAG.getBitcast(ValLoc, ValArg);
2240 }
2241
2242 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2243}
2244
2245/// Breaks v64i1 value into two registers and adds the new node to the DAG
2246static void Passv64i1ArgInRegs(
2247 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2248 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2249 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2250 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2250, __PRETTY_FUNCTION__))
;
2251 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2251, __PRETTY_FUNCTION__))
;
2252 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2252, __PRETTY_FUNCTION__))
;
2253 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2254, __PRETTY_FUNCTION__))
2254 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2254, __PRETTY_FUNCTION__))
;
2255
2256 // Before splitting the value we cast it to i64
2257 Arg = DAG.getBitcast(MVT::i64, Arg);
2258
2259 // Splitting the value into two i32 types
2260 SDValue Lo, Hi;
2261 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2262 DAG.getConstant(0, Dl, MVT::i32));
2263 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2264 DAG.getConstant(1, Dl, MVT::i32));
2265
2266 // Attach the two i32 types into corresponding registers
2267 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2268 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2269}
2270
2271SDValue
2272X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2273 bool isVarArg,
2274 const SmallVectorImpl<ISD::OutputArg> &Outs,
2275 const SmallVectorImpl<SDValue> &OutVals,
2276 const SDLoc &dl, SelectionDAG &DAG) const {
2277 MachineFunction &MF = DAG.getMachineFunction();
2278 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2279
2280 // In some cases we need to disable registers from the default CSR list.
2281 // For example, when they are used for argument passing.
2282 bool ShouldDisableCalleeSavedRegister =
2283 CallConv == CallingConv::X86_RegCall ||
2284 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2285
2286 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2287 report_fatal_error("X86 interrupts may not return any value");
2288
2289 SmallVector<CCValAssign, 16> RVLocs;
2290 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2291 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2292
2293 SDValue Flag;
2294 SmallVector<SDValue, 6> RetOps;
2295 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2296 // Operand #1 = Bytes To Pop
2297 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2298 MVT::i32));
2299
2300 // Copy the result values into the output registers.
2301 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2302 ++I, ++OutsIndex) {
2303 CCValAssign &VA = RVLocs[I];
2304 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2304, __PRETTY_FUNCTION__))
;
2305
2306 // Add the register to the CalleeSaveDisableRegs list.
2307 if (ShouldDisableCalleeSavedRegister)
2308 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2309
2310 SDValue ValToCopy = OutVals[OutsIndex];
2311 EVT ValVT = ValToCopy.getValueType();
2312
2313 // Promote values to the appropriate types.
2314 if (VA.getLocInfo() == CCValAssign::SExt)
2315 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2316 else if (VA.getLocInfo() == CCValAssign::ZExt)
2317 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2318 else if (VA.getLocInfo() == CCValAssign::AExt) {
2319 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2320 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2321 else
2322 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2323 }
2324 else if (VA.getLocInfo() == CCValAssign::BCvt)
2325 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2326
2327 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2328, __PRETTY_FUNCTION__))
2328 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2328, __PRETTY_FUNCTION__))
;
2329
2330 // If this is x86-64, and we disabled SSE, we can't return FP values,
2331 // or SSE or MMX vectors.
2332 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2333 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2334 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2335 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2336 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2337 } else if (ValVT == MVT::f64 &&
2338 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2339 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2340 // llvm-gcc has never done it right and no one has noticed, so this
2341 // should be OK for now.
2342 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2343 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2344 }
2345
2346 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2347 // the RET instruction and handled by the FP Stackifier.
2348 if (VA.getLocReg() == X86::FP0 ||
2349 VA.getLocReg() == X86::FP1) {
2350 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2351 // change the value to the FP stack register class.
2352 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2353 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2354 RetOps.push_back(ValToCopy);
2355 // Don't emit a copytoreg.
2356 continue;
2357 }
2358
2359 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2360 // which is returned in RAX / RDX.
2361 if (Subtarget.is64Bit()) {
2362 if (ValVT == MVT::x86mmx) {
2363 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2364 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2365 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2366 ValToCopy);
2367 // If we don't have SSE2 available, convert to v4f32 so the generated
2368 // register is legal.
2369 if (!Subtarget.hasSSE2())
2370 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2371 }
2372 }
2373 }
2374
2375 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2376
2377 if (VA.needsCustom()) {
2378 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2379, __PRETTY_FUNCTION__))
2379 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2379, __PRETTY_FUNCTION__))
;
2380
2381 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2382 Subtarget);
2383
2384 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2385, __PRETTY_FUNCTION__))
2385 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2385, __PRETTY_FUNCTION__))
;
2386
2387 // Add the second register to the CalleeSaveDisableRegs list.
2388 if (ShouldDisableCalleeSavedRegister)
2389 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2390 } else {
2391 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2392 }
2393
2394 // Add nodes to the DAG and add the values into the RetOps list
2395 for (auto &Reg : RegsToPass) {
2396 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2397 Flag = Chain.getValue(1);
2398 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2399 }
2400 }
2401
2402 // Swift calling convention does not require we copy the sret argument
2403 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2404
2405 // All x86 ABIs require that for returning structs by value we copy
2406 // the sret argument into %rax/%eax (depending on ABI) for the return.
2407 // We saved the argument into a virtual register in the entry block,
2408 // so now we copy the value out and into %rax/%eax.
2409 //
2410 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2411 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2412 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2413 // either case FuncInfo->setSRetReturnReg() will have been called.
2414 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2415 // When we have both sret and another return value, we should use the
2416 // original Chain stored in RetOps[0], instead of the current Chain updated
2417 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2418
2419 // For the case of sret and another return value, we have
2420 // Chain_0 at the function entry
2421 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2422 // If we use Chain_1 in getCopyFromReg, we will have
2423 // Val = getCopyFromReg(Chain_1)
2424 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2425
2426 // getCopyToReg(Chain_0) will be glued together with
2427 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2428 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2429 // Data dependency from Unit B to Unit A due to usage of Val in
2430 // getCopyToReg(Chain_1, Val)
2431 // Chain dependency from Unit A to Unit B
2432
2433 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2434 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2435 getPointerTy(MF.getDataLayout()));
2436
2437 unsigned RetValReg
2438 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2439 X86::RAX : X86::EAX;
2440 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2441 Flag = Chain.getValue(1);
2442
2443 // RAX/EAX now acts like a return value.
2444 RetOps.push_back(
2445 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2446
2447 // Add the returned register to the CalleeSaveDisableRegs list.
2448 if (ShouldDisableCalleeSavedRegister)
2449 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2450 }
2451
2452 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2453 const MCPhysReg *I =
2454 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2455 if (I) {
2456 for (; *I; ++I) {
2457 if (X86::GR64RegClass.contains(*I))
2458 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2459 else
2460 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2460)
;
2461 }
2462 }
2463
2464 RetOps[0] = Chain; // Update chain.
2465
2466 // Add the flag if we have it.
2467 if (Flag.getNode())
2468 RetOps.push_back(Flag);
2469
2470 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2471 if (CallConv == CallingConv::X86_INTR)
2472 opcode = X86ISD::IRET;
2473 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2474}
2475
2476bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2477 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2478 return false;
2479
2480 SDValue TCChain = Chain;
2481 SDNode *Copy = *N->use_begin();
2482 if (Copy->getOpcode() == ISD::CopyToReg) {
2483 // If the copy has a glue operand, we conservatively assume it isn't safe to
2484 // perform a tail call.
2485 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2486 return false;
2487 TCChain = Copy->getOperand(0);
2488 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2489 return false;
2490
2491 bool HasRet = false;
2492 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2493 UI != UE; ++UI) {
2494 if (UI->getOpcode() != X86ISD::RET_FLAG)
2495 return false;
2496 // If we are returning more than one value, we can definitely
2497 // not make a tail call see PR19530
2498 if (UI->getNumOperands() > 4)
2499 return false;
2500 if (UI->getNumOperands() == 4 &&
2501 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2502 return false;
2503 HasRet = true;
2504 }
2505
2506 if (!HasRet)
2507 return false;
2508
2509 Chain = TCChain;
2510 return true;
2511}
2512
2513EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2514 ISD::NodeType ExtendKind) const {
2515 MVT ReturnMVT = MVT::i32;
2516
2517 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2518 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2519 // The ABI does not require i1, i8 or i16 to be extended.
2520 //
2521 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2522 // always extending i8/i16 return values, so keep doing that for now.
2523 // (PR26665).
2524 ReturnMVT = MVT::i8;
2525 }
2526
2527 EVT MinVT = getRegisterType(Context, ReturnMVT);
2528 return VT.bitsLT(MinVT) ? MinVT : VT;
2529}
2530
2531/// Reads two 32 bit registers and creates a 64 bit mask value.
2532/// \param VA The current 32 bit value that need to be assigned.
2533/// \param NextVA The next 32 bit value that need to be assigned.
2534/// \param Root The parent DAG node.
2535/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2536/// glue purposes. In the case the DAG is already using
2537/// physical register instead of virtual, we should glue
2538/// our new SDValue to InFlag SDvalue.
2539/// \return a new SDvalue of size 64bit.
2540static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2541 SDValue &Root, SelectionDAG &DAG,
2542 const SDLoc &Dl, const X86Subtarget &Subtarget,
2543 SDValue *InFlag = nullptr) {
2544 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2544, __PRETTY_FUNCTION__))
;
2545 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2545, __PRETTY_FUNCTION__))
;
2546 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2547, __PRETTY_FUNCTION__))
2547 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2547, __PRETTY_FUNCTION__))
;
2548 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2549, __PRETTY_FUNCTION__))
2549 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2549, __PRETTY_FUNCTION__))
;
2550 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2551, __PRETTY_FUNCTION__))
2551 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2551, __PRETTY_FUNCTION__))
;
2552
2553 SDValue Lo, Hi;
2554 unsigned Reg;
2555 SDValue ArgValueLo, ArgValueHi;
2556
2557 MachineFunction &MF = DAG.getMachineFunction();
2558 const TargetRegisterClass *RC = &X86::GR32RegClass;
2559
2560 // Read a 32 bit value from the registers.
2561 if (nullptr == InFlag) {
2562 // When no physical register is present,
2563 // create an intermediate virtual register.
2564 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2565 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2566 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2567 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2568 } else {
2569 // When a physical register is available read the value from it and glue
2570 // the reads together.
2571 ArgValueLo =
2572 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2573 *InFlag = ArgValueLo.getValue(2);
2574 ArgValueHi =
2575 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2576 *InFlag = ArgValueHi.getValue(2);
2577 }
2578
2579 // Convert the i32 type into v32i1 type.
2580 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2581
2582 // Convert the i32 type into v32i1 type.
2583 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2584
2585 // Concatenate the two values together.
2586 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2587}
2588
2589/// The function will lower a register of various sizes (8/16/32/64)
2590/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2591/// \returns a DAG node contains the operand after lowering to mask type.
2592static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2593 const EVT &ValLoc, const SDLoc &Dl,
2594 SelectionDAG &DAG) {
2595 SDValue ValReturned = ValArg;
2596
2597 if (ValVT == MVT::v1i1)
2598 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2599
2600 if (ValVT == MVT::v64i1) {
2601 // In 32 bit machine, this case is handled by getv64i1Argument
2602 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2602, __PRETTY_FUNCTION__))
;
2603 // In 64 bit machine, There is no need to truncate the value only bitcast
2604 } else {
2605 MVT maskLen;
2606 switch (ValVT.getSimpleVT().SimpleTy) {
2607 case MVT::v8i1:
2608 maskLen = MVT::i8;
2609 break;
2610 case MVT::v16i1:
2611 maskLen = MVT::i16;
2612 break;
2613 case MVT::v32i1:
2614 maskLen = MVT::i32;
2615 break;
2616 default:
2617 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2617)
;
2618 }
2619
2620 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2621 }
2622 return DAG.getBitcast(ValVT, ValReturned);
2623}
2624
2625/// Lower the result values of a call into the
2626/// appropriate copies out of appropriate physical registers.
2627///
2628SDValue X86TargetLowering::LowerCallResult(
2629 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2630 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2631 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2632 uint32_t *RegMask) const {
2633
2634 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2635 // Assign locations to each value returned by this call.
2636 SmallVector<CCValAssign, 16> RVLocs;
2637 bool Is64Bit = Subtarget.is64Bit();
2638 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2639 *DAG.getContext());
2640 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2641
2642 // Copy all of the result registers out of their specified physreg.
2643 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2644 ++I, ++InsIndex) {
2645 CCValAssign &VA = RVLocs[I];
2646 EVT CopyVT = VA.getLocVT();
2647
2648 // In some calling conventions we need to remove the used registers
2649 // from the register mask.
2650 if (RegMask) {
2651 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2652 SubRegs.isValid(); ++SubRegs)
2653 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2654 }
2655
2656 // If this is x86-64, and we disabled SSE, we can't return FP values
2657 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2658 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2659 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2660 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2661 }
2662
2663 // If we prefer to use the value in xmm registers, copy it out as f80 and
2664 // use a truncate to move it from fp stack reg to xmm reg.
2665 bool RoundAfterCopy = false;
2666 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2667 isScalarFPTypeInSSEReg(VA.getValVT())) {
2668 if (!Subtarget.hasX87())
2669 report_fatal_error("X87 register return with X87 disabled");
2670 CopyVT = MVT::f80;
2671 RoundAfterCopy = (CopyVT != VA.getLocVT());
2672 }
2673
2674 SDValue Val;
2675 if (VA.needsCustom()) {
2676 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2677, __PRETTY_FUNCTION__))
2677 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2677, __PRETTY_FUNCTION__))
;
2678 Val =
2679 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2680 } else {
2681 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2682 .getValue(1);
2683 Val = Chain.getValue(0);
2684 InFlag = Chain.getValue(2);
2685 }
2686
2687 if (RoundAfterCopy)
2688 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2689 // This truncation won't change the value.
2690 DAG.getIntPtrConstant(1, dl));
2691
2692 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2693 if (VA.getValVT().isVector() &&
2694 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2695 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2696 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2697 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2698 } else
2699 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2700 }
2701
2702 InVals.push_back(Val);
2703 }
2704
2705 return Chain;
2706}
2707
2708//===----------------------------------------------------------------------===//
2709// C & StdCall & Fast Calling Convention implementation
2710//===----------------------------------------------------------------------===//
2711// StdCall calling convention seems to be standard for many Windows' API
2712// routines and around. It differs from C calling convention just a little:
2713// callee should clean up the stack, not caller. Symbols should be also
2714// decorated in some fancy way :) It doesn't support any vector arguments.
2715// For info on fast calling convention see Fast Calling Convention (tail call)
2716// implementation LowerX86_32FastCCCallTo.
2717
2718/// CallIsStructReturn - Determines whether a call uses struct return
2719/// semantics.
2720enum StructReturnType {
2721 NotStructReturn,
2722 RegStructReturn,
2723 StackStructReturn
2724};
2725static StructReturnType
2726callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
2727 if (Outs.empty())
2728 return NotStructReturn;
2729
2730 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2731 if (!Flags.isSRet())
2732 return NotStructReturn;
2733 if (Flags.isInReg() || IsMCU)
2734 return RegStructReturn;
2735 return StackStructReturn;
2736}
2737
2738/// Determines whether a function uses struct return semantics.
2739static StructReturnType
2740argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
2741 if (Ins.empty())
2742 return NotStructReturn;
2743
2744 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2745 if (!Flags.isSRet())
2746 return NotStructReturn;
2747 if (Flags.isInReg() || IsMCU)
2748 return RegStructReturn;
2749 return StackStructReturn;
2750}
2751
2752/// Make a copy of an aggregate at address specified by "Src" to address
2753/// "Dst" with size and alignment information specified by the specific
2754/// parameter attribute. The copy will be passed as a byval function parameter.
2755static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2756 SDValue Chain, ISD::ArgFlagsTy Flags,
2757 SelectionDAG &DAG, const SDLoc &dl) {
2758 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2759
2760 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2761 /*isVolatile*/false, /*AlwaysInline=*/true,
2762 /*isTailCall*/false,
2763 MachinePointerInfo(), MachinePointerInfo());
2764}
2765
2766/// Return true if the calling convention is one that we can guarantee TCO for.
2767static bool canGuaranteeTCO(CallingConv::ID CC) {
2768 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2769 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2770 CC == CallingConv::HHVM);
2771}
2772
2773/// Return true if we might ever do TCO for calls with this calling convention.
2774static bool mayTailCallThisCC(CallingConv::ID CC) {
2775 switch (CC) {
2776 // C calling conventions:
2777 case CallingConv::C:
2778 case CallingConv::Win64:
2779 case CallingConv::X86_64_SysV:
2780 // Callee pop conventions:
2781 case CallingConv::X86_ThisCall:
2782 case CallingConv::X86_StdCall:
2783 case CallingConv::X86_VectorCall:
2784 case CallingConv::X86_FastCall:
2785 return true;
2786 default:
2787 return canGuaranteeTCO(CC);
2788 }
2789}
2790
2791/// Return true if the function is being made into a tailcall target by
2792/// changing its ABI.
2793static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2794 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2795}
2796
2797bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2798 auto Attr =
2799 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2800 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2801 return false;
2802
2803 ImmutableCallSite CS(CI);
2804 CallingConv::ID CalleeCC = CS.getCallingConv();
2805 if (!mayTailCallThisCC(CalleeCC))
2806 return false;
2807
2808 return true;
2809}
2810
2811SDValue
2812X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2813 const SmallVectorImpl<ISD::InputArg> &Ins,
2814 const SDLoc &dl, SelectionDAG &DAG,
2815 const CCValAssign &VA,
2816 MachineFrameInfo &MFI, unsigned i) const {
2817 // Create the nodes corresponding to a load from this parameter slot.
2818 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2819 bool AlwaysUseMutable = shouldGuaranteeTCO(
2820 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2821 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2822 EVT ValVT;
2823 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2824
2825 // If value is passed by pointer we have address passed instead of the value
2826 // itself. No need to extend if the mask value and location share the same
2827 // absolute size.
2828 bool ExtendedInMem =
2829 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2830 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2831
2832 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2833 ValVT = VA.getLocVT();
2834 else
2835 ValVT = VA.getValVT();
2836
2837 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2838 // taken by a return address.
2839 int Offset = 0;
2840 if (CallConv == CallingConv::X86_INTR) {
2841 // X86 interrupts may take one or two arguments.
2842 // On the stack there will be no return address as in regular call.
2843 // Offset of last argument need to be set to -4/-8 bytes.
2844 // Where offset of the first argument out of two, should be set to 0 bytes.
2845 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2846 if (Subtarget.is64Bit() && Ins.size() == 2) {
2847 // The stack pointer needs to be realigned for 64 bit handlers with error
2848 // code, so the argument offset changes by 8 bytes.
2849 Offset += 8;
2850 }
2851 }
2852
2853 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2854 // changed with more analysis.
2855 // In case of tail call optimization mark all arguments mutable. Since they
2856 // could be overwritten by lowering of arguments in case of a tail call.
2857 if (Flags.isByVal()) {
2858 unsigned Bytes = Flags.getByValSize();
2859 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2860
2861 // FIXME: For now, all byval parameter objects are marked as aliasing. This
2862 // can be improved with deeper analysis.
2863 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
2864 /*isAliased=*/true);
2865 // Adjust SP offset of interrupt parameter.
2866 if (CallConv == CallingConv::X86_INTR) {
2867 MFI.setObjectOffset(FI, Offset);
2868 }
2869 return DAG.getFrameIndex(FI, PtrVT);
2870 }
2871
2872 // This is an argument in memory. We might be able to perform copy elision.
2873 if (Flags.isCopyElisionCandidate()) {
2874 EVT ArgVT = Ins[i].ArgVT;
2875 SDValue PartAddr;
2876 if (Ins[i].PartOffset == 0) {
2877 // If this is a one-part value or the first part of a multi-part value,
2878 // create a stack object for the entire argument value type and return a
2879 // load from our portion of it. This assumes that if the first part of an
2880 // argument is in memory, the rest will also be in memory.
2881 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
2882 /*Immutable=*/false);
2883 PartAddr = DAG.getFrameIndex(FI, PtrVT);
2884 return DAG.getLoad(
2885 ValVT, dl, Chain, PartAddr,
2886 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2887 } else {
2888 // This is not the first piece of an argument in memory. See if there is
2889 // already a fixed stack object including this offset. If so, assume it
2890 // was created by the PartOffset == 0 branch above and create a load from
2891 // the appropriate offset into it.
2892 int64_t PartBegin = VA.getLocMemOffset();
2893 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
2894 int FI = MFI.getObjectIndexBegin();
2895 for (; MFI.isFixedObjectIndex(FI); ++FI) {
2896 int64_t ObjBegin = MFI.getObjectOffset(FI);
2897 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
2898 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
2899 break;
2900 }
2901 if (MFI.isFixedObjectIndex(FI)) {
2902 SDValue Addr =
2903 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
2904 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
2905 return DAG.getLoad(
2906 ValVT, dl, Chain, Addr,
2907 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
2908 Ins[i].PartOffset));
2909 }
2910 }
2911 }
2912
2913 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2914 VA.getLocMemOffset(), isImmutable);
2915
2916 // Set SExt or ZExt flag.
2917 if (VA.getLocInfo() == CCValAssign::ZExt) {
2918 MFI.setObjectZExt(FI, true);
2919 } else if (VA.getLocInfo() == CCValAssign::SExt) {
2920 MFI.setObjectSExt(FI, true);
2921 }
2922
2923 // Adjust SP offset of interrupt parameter.
2924 if (CallConv == CallingConv::X86_INTR) {
2925 MFI.setObjectOffset(FI, Offset);
2926 }
2927
2928 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2929 SDValue Val = DAG.getLoad(
2930 ValVT, dl, Chain, FIN,
2931 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2932 return ExtendedInMem
2933 ? (VA.getValVT().isVector()
2934 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
2935 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
2936 : Val;
2937}
2938
2939// FIXME: Get this from tablegen.
2940static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2941 const X86Subtarget &Subtarget) {
2942 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2942, __PRETTY_FUNCTION__))
;
2943
2944 if (Subtarget.isCallingConvWin64(CallConv)) {
2945 static const MCPhysReg GPR64ArgRegsWin64[] = {
2946 X86::RCX, X86::RDX, X86::R8, X86::R9
2947 };
2948 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2949 }
2950
2951 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2952 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2953 };
2954 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2955}
2956
2957// FIXME: Get this from tablegen.
2958static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2959 CallingConv::ID CallConv,
2960 const X86Subtarget &Subtarget) {
2961 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2961, __PRETTY_FUNCTION__))
;
2962 if (Subtarget.isCallingConvWin64(CallConv)) {
2963 // The XMM registers which might contain var arg parameters are shadowed
2964 // in their paired GPR. So we only need to save the GPR to their home
2965 // slots.
2966 // TODO: __vectorcall will change this.
2967 return None;
2968 }
2969
2970 const Function &F = MF.getFunction();
2971 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
2972 bool isSoftFloat = Subtarget.useSoftFloat();
2973 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2974, __PRETTY_FUNCTION__))
2974 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 2974, __PRETTY_FUNCTION__))
;
2975 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
2976 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2977 // registers.
2978 return None;
2979
2980 static const MCPhysReg XMMArgRegs64Bit[] = {
2981 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2982 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2983 };
2984 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2985}
2986
2987#ifndef NDEBUG
2988static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
2989 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2990 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2991 return A.getValNo() < B.getValNo();
2992 });
2993}
2994#endif
2995
2996SDValue X86TargetLowering::LowerFormalArguments(
2997 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2998 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2999 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3000 MachineFunction &MF = DAG.getMachineFunction();
3001 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3002 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3003
3004 const Function &F = MF.getFunction();
3005 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3006 F.getName() == "main")
3007 FuncInfo->setForceFramePointer(true);
3008
3009 MachineFrameInfo &MFI = MF.getFrameInfo();
3010 bool Is64Bit = Subtarget.is64Bit();
3011 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3012
3013 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3015, __PRETTY_FUNCTION__))
3014 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3015, __PRETTY_FUNCTION__))
3015 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3015, __PRETTY_FUNCTION__))
;
3016
3017 if (CallConv == CallingConv::X86_INTR) {
3018 bool isLegal = Ins.size() == 1 ||
3019 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
3020 (!Is64Bit && Ins[1].VT == MVT::i32)));
3021 if (!isLegal)
3022 report_fatal_error("X86 interrupts may take one or two arguments");
3023 }
3024
3025 // Assign locations to all of the incoming arguments.
3026 SmallVector<CCValAssign, 16> ArgLocs;
3027 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3028
3029 // Allocate shadow area for Win64.
3030 if (IsWin64)
3031 CCInfo.AllocateStack(32, 8);
3032
3033 CCInfo.AnalyzeArguments(Ins, CC_X86);
3034
3035 // In vectorcall calling convention a second pass is required for the HVA
3036 // types.
3037 if (CallingConv::X86_VectorCall == CallConv) {
3038 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3039 }
3040
3041 // The next loop assumes that the locations are in the same order of the
3042 // input arguments.
3043 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3044, __PRETTY_FUNCTION__))
3044 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3044, __PRETTY_FUNCTION__))
;
3045
3046 SDValue ArgValue;
3047 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3048 ++I, ++InsIndex) {
3049 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3049, __PRETTY_FUNCTION__))
;
3050 CCValAssign &VA = ArgLocs[I];
3051
3052 if (VA.isRegLoc()) {
3053 EVT RegVT = VA.getLocVT();
3054 if (VA.needsCustom()) {
3055 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3057, __PRETTY_FUNCTION__))
3056 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3057, __PRETTY_FUNCTION__))
3057 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3057, __PRETTY_FUNCTION__))
;
3058
3059 // v64i1 values, in regcall calling convention, that are
3060 // compiled to 32 bit arch, are split up into two registers.
3061 ArgValue =
3062 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3063 } else {
3064 const TargetRegisterClass *RC;
3065 if (RegVT == MVT::i8)
3066 RC = &X86::GR8RegClass;
3067 else if (RegVT == MVT::i16)
3068 RC = &X86::GR16RegClass;
3069 else if (RegVT == MVT::i32)
3070 RC = &X86::GR32RegClass;
3071 else if (Is64Bit && RegVT == MVT::i64)
3072 RC = &X86::GR64RegClass;
3073 else if (RegVT == MVT::f32)
3074 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3075 else if (RegVT == MVT::f64)
3076 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3077 else if (RegVT == MVT::f80)
3078 RC = &X86::RFP80RegClass;
3079 else if (RegVT == MVT::f128)
3080 RC = &X86::VR128RegClass;
3081 else if (RegVT.is512BitVector())
3082 RC = &X86::VR512RegClass;
3083 else if (RegVT.is256BitVector())
3084 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3085 else if (RegVT.is128BitVector())
3086 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3087 else if (RegVT == MVT::x86mmx)
3088 RC = &X86::VR64RegClass;
3089 else if (RegVT == MVT::v1i1)
3090 RC = &X86::VK1RegClass;
3091 else if (RegVT == MVT::v8i1)
3092 RC = &X86::VK8RegClass;
3093 else if (RegVT == MVT::v16i1)
3094 RC = &X86::VK16RegClass;
3095 else if (RegVT == MVT::v32i1)
3096 RC = &X86::VK32RegClass;
3097 else if (RegVT == MVT::v64i1)
3098 RC = &X86::VK64RegClass;
3099 else
3100 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3100)
;
3101
3102 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3103 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3104 }
3105
3106 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3107 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3108 // right size.
3109 if (VA.getLocInfo() == CCValAssign::SExt)
3110 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3111 DAG.getValueType(VA.getValVT()));
3112 else if (VA.getLocInfo() == CCValAssign::ZExt)
3113 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3114 DAG.getValueType(VA.getValVT()));
3115 else if (VA.getLocInfo() == CCValAssign::BCvt)
3116 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3117
3118 if (VA.isExtInLoc()) {
3119 // Handle MMX values passed in XMM regs.
3120 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3121 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3122 else if (VA.getValVT().isVector() &&
3123 VA.getValVT().getScalarType() == MVT::i1 &&
3124 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3125 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3126 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3127 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3128 } else
3129 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3130 }
3131 } else {
3132 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3132, __PRETTY_FUNCTION__))
;
3133 ArgValue =
3134 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3135 }
3136
3137 // If value is passed via pointer - do a load.
3138 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3139 ArgValue =
3140 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3141
3142 InVals.push_back(ArgValue);
3143 }
3144
3145 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3146 // Swift calling convention does not require we copy the sret argument
3147 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3148 if (CallConv == CallingConv::Swift)
3149 continue;
3150
3151 // All x86 ABIs require that for returning structs by value we copy the
3152 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3153 // the argument into a virtual register so that we can access it from the
3154 // return points.
3155 if (Ins[I].Flags.isSRet()) {
3156 unsigned Reg = FuncInfo->getSRetReturnReg();
3157 if (!Reg) {
3158 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3159 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3160 FuncInfo->setSRetReturnReg(Reg);
3161 }
3162 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3163 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3164 break;
3165 }
3166 }
3167
3168 unsigned StackSize = CCInfo.getNextStackOffset();
3169 // Align stack specially for tail calls.
3170 if (shouldGuaranteeTCO(CallConv,
3171 MF.getTarget().Options.GuaranteedTailCallOpt))
3172 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3173
3174 // If the function takes variable number of arguments, make a frame index for
3175 // the start of the first vararg value... for expansion of llvm.va_start. We
3176 // can skip this if there are no va_start calls.
3177 if (MFI.hasVAStart() &&
3178 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3179 CallConv != CallingConv::X86_ThisCall))) {
3180 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3181 }
3182
3183 // Figure out if XMM registers are in use.
3184 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3186, __PRETTY_FUNCTION__))
3185 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3186, __PRETTY_FUNCTION__))
3186 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3186, __PRETTY_FUNCTION__))
;
3187
3188 // 64-bit calling conventions support varargs and register parameters, so we
3189 // have to do extra work to spill them in the prologue.
3190 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3191 // Find the first unallocated argument registers.
3192 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3193 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3194 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3195 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3196 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3197, __PRETTY_FUNCTION__))
3197 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3197, __PRETTY_FUNCTION__))
;
3198
3199 // Gather all the live in physical registers.
3200 SmallVector<SDValue, 6> LiveGPRs;
3201 SmallVector<SDValue, 8> LiveXMMRegs;
3202 SDValue ALVal;
3203 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3204 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3205 LiveGPRs.push_back(
3206 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3207 }
3208 if (!ArgXMMs.empty()) {
3209 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3210 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3211 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3212 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3213 LiveXMMRegs.push_back(
3214 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3215 }
3216 }
3217
3218 if (IsWin64) {
3219 // Get to the caller-allocated home save location. Add 8 to account
3220 // for the return address.
3221 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3222 FuncInfo->setRegSaveFrameIndex(
3223 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3224 // Fixup to set vararg frame on shadow area (4 x i64).
3225 if (NumIntRegs < 4)
3226 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3227 } else {
3228 // For X86-64, if there are vararg parameters that are passed via
3229 // registers, then we must store them to their spots on the stack so
3230 // they may be loaded by dereferencing the result of va_next.
3231 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3232 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3233 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3234 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3235 }
3236
3237 // Store the integer parameter registers.
3238 SmallVector<SDValue, 8> MemOps;
3239 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3240 getPointerTy(DAG.getDataLayout()));
3241 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3242 for (SDValue Val : LiveGPRs) {
3243 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3244 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3245 SDValue Store =
3246 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3247 MachinePointerInfo::getFixedStack(
3248 DAG.getMachineFunction(),
3249 FuncInfo->getRegSaveFrameIndex(), Offset));
3250 MemOps.push_back(Store);
3251 Offset += 8;
3252 }
3253
3254 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3255 // Now store the XMM (fp + vector) parameter registers.
3256 SmallVector<SDValue, 12> SaveXMMOps;
3257 SaveXMMOps.push_back(Chain);
3258 SaveXMMOps.push_back(ALVal);
3259 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3260 FuncInfo->getRegSaveFrameIndex(), dl));
3261 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3262 FuncInfo->getVarArgsFPOffset(), dl));
3263 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3264 LiveXMMRegs.end());
3265 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3266 MVT::Other, SaveXMMOps));
3267 }
3268
3269 if (!MemOps.empty())
3270 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3271 }
3272
3273 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3274 // Find the largest legal vector type.
3275 MVT VecVT = MVT::Other;
3276 // FIXME: Only some x86_32 calling conventions support AVX512.
3277 if (Subtarget.hasAVX512() &&
3278 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3279 CallConv == CallingConv::Intel_OCL_BI)))
3280 VecVT = MVT::v16f32;
3281 else if (Subtarget.hasAVX())
3282 VecVT = MVT::v8f32;
3283 else if (Subtarget.hasSSE2())
3284 VecVT = MVT::v4f32;
3285
3286 // We forward some GPRs and some vector types.
3287 SmallVector<MVT, 2> RegParmTypes;
3288 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3289 RegParmTypes.push_back(IntVT);
3290 if (VecVT != MVT::Other)
3291 RegParmTypes.push_back(VecVT);
3292
3293 // Compute the set of forwarded registers. The rest are scratch.
3294 SmallVectorImpl<ForwardedRegister> &Forwards =
3295 FuncInfo->getForwardedMustTailRegParms();
3296 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3297
3298 // Conservatively forward AL on x86_64, since it might be used for varargs.
3299 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3300 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3301 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3302 }
3303
3304 // Copy all forwards from physical to virtual registers.
3305 for (ForwardedRegister &F : Forwards) {
3306 // FIXME: Can we use a less constrained schedule?
3307 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3308 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3309 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3310 }
3311 }
3312
3313 // Some CCs need callee pop.
3314 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3315 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3316 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3317 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3318 // X86 interrupts must pop the error code (and the alignment padding) if
3319 // present.
3320 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3321 } else {
3322 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3323 // If this is an sret function, the return should pop the hidden pointer.
3324 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3325 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3326 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3327 FuncInfo->setBytesToPopOnReturn(4);
3328 }
3329
3330 if (!Is64Bit) {
3331 // RegSaveFrameIndex is X86-64 only.
3332 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3333 if (CallConv == CallingConv::X86_FastCall ||
3334 CallConv == CallingConv::X86_ThisCall)
3335 // fastcc functions can't have varargs.
3336 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3337 }
3338
3339 FuncInfo->setArgumentStackSize(StackSize);
3340
3341 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3342 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3343 if (Personality == EHPersonality::CoreCLR) {
3344 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3344, __PRETTY_FUNCTION__))
;
3345 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3346 // that we'd prefer this slot be allocated towards the bottom of the frame
3347 // (i.e. near the stack pointer after allocating the frame). Every
3348 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3349 // offset from the bottom of this and each funclet's frame must be the
3350 // same, so the size of funclets' (mostly empty) frames is dictated by
3351 // how far this slot is from the bottom (since they allocate just enough
3352 // space to accommodate holding this slot at the correct offset).
3353 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3354 EHInfo->PSPSymFrameIdx = PSPSymFI;
3355 }
3356 }
3357
3358 if (CallConv == CallingConv::X86_RegCall ||
3359 F.hasFnAttribute("no_caller_saved_registers")) {
3360 MachineRegisterInfo &MRI = MF.getRegInfo();
3361 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3362 MRI.disableCalleeSavedRegister(Pair.first);
3363 }
3364
3365 return Chain;
3366}
3367
3368SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3369 SDValue Arg, const SDLoc &dl,
3370 SelectionDAG &DAG,
3371 const CCValAssign &VA,
3372 ISD::ArgFlagsTy Flags) const {
3373 unsigned LocMemOffset = VA.getLocMemOffset();
3374 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3375 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3376 StackPtr, PtrOff);
3377 if (Flags.isByVal())
3378 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3379
3380 return DAG.getStore(
3381 Chain, dl, Arg, PtrOff,
3382 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3383}
3384
3385/// Emit a load of return address if tail call
3386/// optimization is performed and it is required.
3387SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3388 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3389 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3390 // Adjust the Return address stack slot.
3391 EVT VT = getPointerTy(DAG.getDataLayout());
3392 OutRetAddr = getReturnAddressFrameIndex(DAG);
3393
3394 // Load the "old" Return address.
3395 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3396 return SDValue(OutRetAddr.getNode(), 1);
3397}
3398
3399/// Emit a store of the return address if tail call
3400/// optimization is performed and it is required (FPDiff!=0).
3401static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3402 SDValue Chain, SDValue RetAddrFrIdx,
3403 EVT PtrVT, unsigned SlotSize,
3404 int FPDiff, const SDLoc &dl) {
3405 // Store the return address to the appropriate stack slot.
3406 if (!FPDiff) return Chain;
3407 // Calculate the new stack slot for the return address.
3408 int NewReturnAddrFI =
3409 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3410 false);
3411 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3412 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3413 MachinePointerInfo::getFixedStack(
3414 DAG.getMachineFunction(), NewReturnAddrFI));
3415 return Chain;
3416}
3417
3418/// Returns a vector_shuffle mask for an movs{s|d}, movd
3419/// operation of specified width.
3420static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3421 SDValue V2) {
3422 unsigned NumElems = VT.getVectorNumElements();
3423 SmallVector<int, 8> Mask;
3424 Mask.push_back(NumElems);
3425 for (unsigned i = 1; i != NumElems; ++i)
3426 Mask.push_back(i);
3427 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3428}
3429
3430SDValue
3431X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3432 SmallVectorImpl<SDValue> &InVals) const {
3433 SelectionDAG &DAG = CLI.DAG;
3434 SDLoc &dl = CLI.DL;
3435 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3436 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3437 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3438 SDValue Chain = CLI.Chain;
3439 SDValue Callee = CLI.Callee;
3440 CallingConv::ID CallConv = CLI.CallConv;
3441 bool &isTailCall = CLI.IsTailCall;
3442 bool isVarArg = CLI.IsVarArg;
3443
3444 MachineFunction &MF = DAG.getMachineFunction();
3445 bool Is64Bit = Subtarget.is64Bit();
3446 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3447 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3448 bool IsSibcall = false;
3449 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3450 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3451 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3452 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3453 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3454 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3455 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3456 bool HasNoCfCheck =
3457 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3458 const Module *M = MF.getMMI().getModule();
3459 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3460
3461 if (CallConv == CallingConv::X86_INTR)
3462 report_fatal_error("X86 interrupts may not be called directly");
3463
3464 if (Attr.getValueAsString() == "true")
3465 isTailCall = false;
3466
3467 if (Subtarget.isPICStyleGOT() &&
3468 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3469 // If we are using a GOT, disable tail calls to external symbols with
3470 // default visibility. Tail calling such a symbol requires using a GOT
3471 // relocation, which forces early binding of the symbol. This breaks code
3472 // that require lazy function symbol resolution. Using musttail or
3473 // GuaranteedTailCallOpt will override this.
3474 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3475 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3476 G->getGlobal()->hasDefaultVisibility()))
3477 isTailCall = false;
3478 }
3479
3480 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3481 if (IsMustTail) {
3482 // Force this to be a tail call. The verifier rules are enough to ensure
3483 // that we can lower this successfully without moving the return address
3484 // around.
3485 isTailCall = true;
3486 } else if (isTailCall) {
3487 // Check if it's really possible to do a tail call.
3488 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3489 isVarArg, SR != NotStructReturn,
3490 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3491 Outs, OutVals, Ins, DAG);
3492
3493 // Sibcalls are automatically detected tailcalls which do not require
3494 // ABI changes.
3495 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3496 IsSibcall = true;
3497
3498 if (isTailCall)
3499 ++NumTailCalls;
3500 }
3501
3502 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3503, __PRETTY_FUNCTION__))
3503 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3503, __PRETTY_FUNCTION__))
;
3504
3505 // Analyze operands of the call, assigning locations to each operand.
3506 SmallVector<CCValAssign, 16> ArgLocs;
3507 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3508
3509 // Allocate shadow area for Win64.
3510 if (IsWin64)
3511 CCInfo.AllocateStack(32, 8);
3512
3513 CCInfo.AnalyzeArguments(Outs, CC_X86);
3514
3515 // In vectorcall calling convention a second pass is required for the HVA
3516 // types.
3517 if (CallingConv::X86_VectorCall == CallConv) {
3518 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3519 }
3520
3521 // Get a count of how many bytes are to be pushed on the stack.
3522 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3523 if (IsSibcall)
3524 // This is a sibcall. The memory operands are available in caller's
3525 // own caller's stack.
3526 NumBytes = 0;
3527 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3528 canGuaranteeTCO(CallConv))
3529 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3530
3531 int FPDiff = 0;
3532 if (isTailCall && !IsSibcall && !IsMustTail) {
3533 // Lower arguments at fp - stackoffset + fpdiff.
3534 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3535
3536 FPDiff = NumBytesCallerPushed - NumBytes;
3537
3538 // Set the delta of movement of the returnaddr stackslot.
3539 // But only set if delta is greater than previous delta.
3540 if (FPDiff < X86Info->getTCReturnAddrDelta())
3541 X86Info->setTCReturnAddrDelta(FPDiff);
3542 }
3543
3544 unsigned NumBytesToPush = NumBytes;
3545 unsigned NumBytesToPop = NumBytes;
3546
3547 // If we have an inalloca argument, all stack space has already been allocated
3548 // for us and be right at the top of the stack. We don't support multiple
3549 // arguments passed in memory when using inalloca.
3550 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3551 NumBytesToPush = 0;
3552 if (!ArgLocs.back().isMemLoc())
3553 report_fatal_error("cannot use inalloca attribute on a register "
3554 "parameter");
3555 if (ArgLocs.back().getLocMemOffset() != 0)
3556 report_fatal_error("any parameter with the inalloca attribute must be "
3557 "the only memory argument");
3558 }
3559
3560 if (!IsSibcall)
3561 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3562 NumBytes - NumBytesToPush, dl);
3563
3564 SDValue RetAddrFrIdx;
3565 // Load return address for tail calls.
3566 if (isTailCall && FPDiff)
3567 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3568 Is64Bit, FPDiff, dl);
3569
3570 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3571 SmallVector<SDValue, 8> MemOpChains;
3572 SDValue StackPtr;
3573
3574 // The next loop assumes that the locations are in the same order of the
3575 // input arguments.
3576 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3577, __PRETTY_FUNCTION__))
3577 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3577, __PRETTY_FUNCTION__))
;
3578
3579 // Walk the register/memloc assignments, inserting copies/loads. In the case
3580 // of tail call optimization arguments are handle later.
3581 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3582 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3583 ++I, ++OutIndex) {
3584 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3584, __PRETTY_FUNCTION__))
;
3585 // Skip inalloca arguments, they have already been written.
3586 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3587 if (Flags.isInAlloca())
3588 continue;
3589
3590 CCValAssign &VA = ArgLocs[I];
3591 EVT RegVT = VA.getLocVT();
3592 SDValue Arg = OutVals[OutIndex];
3593 bool isByVal = Flags.isByVal();
3594
3595 // Promote the value if needed.
3596 switch (VA.getLocInfo()) {
3597 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3597)
;
3598 case CCValAssign::Full: break;
3599 case CCValAssign::SExt:
3600 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3601 break;
3602 case CCValAssign::ZExt:
3603 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3604 break;
3605 case CCValAssign::AExt:
3606 if (Arg.getValueType().isVector() &&
3607 Arg.getValueType().getVectorElementType() == MVT::i1)
3608 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3609 else if (RegVT.is128BitVector()) {
3610 // Special case: passing MMX values in XMM registers.
3611 Arg = DAG.getBitcast(MVT::i64, Arg);
3612 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3613 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3614 } else
3615 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3616 break;
3617 case CCValAssign::BCvt:
3618 Arg = DAG.getBitcast(RegVT, Arg);
3619 break;
3620 case CCValAssign::Indirect: {
3621 if (isByVal) {
3622 // Memcpy the argument to a temporary stack slot to prevent
3623 // the caller from seeing any modifications the callee may make
3624 // as guaranteed by the `byval` attribute.
3625 int FrameIdx = MF.getFrameInfo().CreateStackObject(
3626 Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
3627 false);
3628 SDValue StackSlot =
3629 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
3630 Chain =
3631 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
3632 // From now on treat this as a regular pointer
3633 Arg = StackSlot;
3634 isByVal = false;
3635 } else {
3636 // Store the argument.
3637 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3638 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3639 Chain = DAG.getStore(
3640 Chain, dl, Arg, SpillSlot,
3641 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3642 Arg = SpillSlot;
3643 }
3644 break;
3645 }
3646 }
3647
3648 if (VA.needsCustom()) {
3649 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3650, __PRETTY_FUNCTION__))
3650 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3650, __PRETTY_FUNCTION__))
;
3651 // Split v64i1 value into two registers
3652 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3653 Subtarget);
3654 } else if (VA.isRegLoc()) {
3655 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3656 if (isVarArg && IsWin64) {
3657 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3658 // shadow reg if callee is a varargs function.
3659 unsigned ShadowReg = 0;
3660 switch (VA.getLocReg()) {
3661 case X86::XMM0: ShadowReg = X86::RCX; break;
3662 case X86::XMM1: ShadowReg = X86::RDX; break;
3663 case X86::XMM2: ShadowReg = X86::R8; break;
3664 case X86::XMM3: ShadowReg = X86::R9; break;
3665 }
3666 if (ShadowReg)
3667 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3668 }
3669 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3670 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3670, __PRETTY_FUNCTION__))
;
3671 if (!StackPtr.getNode())
3672 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3673 getPointerTy(DAG.getDataLayout()));
3674 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3675 dl, DAG, VA, Flags));
3676 }
3677 }
3678
3679 if (!MemOpChains.empty())
3680 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3681
3682 if (Subtarget.isPICStyleGOT()) {
3683 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3684 // GOT pointer.
3685 if (!isTailCall) {
3686 RegsToPass.push_back(std::make_pair(
3687 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3688 getPointerTy(DAG.getDataLayout()))));
3689 } else {
3690 // If we are tail calling and generating PIC/GOT style code load the
3691 // address of the callee into ECX. The value in ecx is used as target of
3692 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3693 // for tail calls on PIC/GOT architectures. Normally we would just put the
3694 // address of GOT into ebx and then call target@PLT. But for tail calls
3695 // ebx would be restored (since ebx is callee saved) before jumping to the
3696 // target@PLT.
3697
3698 // Note: The actual moving to ECX is done further down.
3699 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3700 if (G && !G->getGlobal()->hasLocalLinkage() &&
3701 G->getGlobal()->hasDefaultVisibility())
3702 Callee = LowerGlobalAddress(Callee, DAG);
3703 else if (isa<ExternalSymbolSDNode>(Callee))
3704 Callee = LowerExternalSymbol(Callee, DAG);
3705 }
3706 }
3707
3708 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3709 // From AMD64 ABI document:
3710 // For calls that may call functions that use varargs or stdargs
3711 // (prototype-less calls or calls to functions containing ellipsis (...) in
3712 // the declaration) %al is used as hidden argument to specify the number
3713 // of SSE registers used. The contents of %al do not need to match exactly
3714 // the number of registers, but must be an ubound on the number of SSE
3715 // registers used and is in the range 0 - 8 inclusive.
3716
3717 // Count the number of XMM registers allocated.
3718 static const MCPhysReg XMMArgRegs[] = {
3719 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3720 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3721 };
3722 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3723 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3724, __PRETTY_FUNCTION__))
3724 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3724, __PRETTY_FUNCTION__))
;
3725
3726 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3727 DAG.getConstant(NumXMMRegs, dl,
3728 MVT::i8)));
3729 }
3730
3731 if (isVarArg && IsMustTail) {
3732 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3733 for (const auto &F : Forwards) {
3734 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3735 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3736 }
3737 }
3738
3739 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3740 // don't need this because the eligibility check rejects calls that require
3741 // shuffling arguments passed in memory.
3742 if (!IsSibcall && isTailCall) {
3743 // Force all the incoming stack arguments to be loaded from the stack
3744 // before any new outgoing arguments are stored to the stack, because the
3745 // outgoing stack slots may alias the incoming argument stack slots, and
3746 // the alias isn't otherwise explicit. This is slightly more conservative
3747 // than necessary, because it means that each store effectively depends
3748 // on every argument instead of just those arguments it would clobber.
3749 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3750
3751 SmallVector<SDValue, 8> MemOpChains2;
3752 SDValue FIN;
3753 int FI = 0;
3754 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3755 ++I, ++OutsIndex) {
3756 CCValAssign &VA = ArgLocs[I];
3757
3758 if (VA.isRegLoc()) {
3759 if (VA.needsCustom()) {
3760 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
3761 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
;
3762 // This means that we are in special case where one argument was
3763 // passed through two register locations - Skip the next location
3764 ++I;
3765 }
3766
3767 continue;
3768 }
3769
3770 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3770, __PRETTY_FUNCTION__))
;
3771 SDValue Arg = OutVals[OutsIndex];
3772 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3773 // Skip inalloca arguments. They don't require any work.
3774 if (Flags.isInAlloca())
3775 continue;
3776 // Create frame index.
3777 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3778 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3779 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3780 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3781
3782 if (Flags.isByVal()) {
3783 // Copy relative to framepointer.
3784 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3785 if (!StackPtr.getNode())
3786 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3787 getPointerTy(DAG.getDataLayout()));
3788 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3789 StackPtr, Source);
3790
3791 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3792 ArgChain,
3793 Flags, DAG, dl));
3794 } else {
3795 // Store relative to framepointer.
3796 MemOpChains2.push_back(DAG.getStore(
3797 ArgChain, dl, Arg, FIN,
3798 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3799 }
3800 }
3801
3802 if (!MemOpChains2.empty())
3803 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3804
3805 // Store the return address to the appropriate stack slot.
3806 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3807 getPointerTy(DAG.getDataLayout()),
3808 RegInfo->getSlotSize(), FPDiff, dl);
3809 }
3810
3811 // Build a sequence of copy-to-reg nodes chained together with token chain
3812 // and flag operands which copy the outgoing args into registers.
3813 SDValue InFlag;
3814 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3815 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3816 RegsToPass[i].second, InFlag);
3817 InFlag = Chain.getValue(1);
3818 }
3819
3820 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3821 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3821, __PRETTY_FUNCTION__))
;
3822 // In the 64-bit large code model, we have to make all calls
3823 // through a register, since the call instruction's 32-bit
3824 // pc-relative offset may not be large enough to hold the whole
3825 // address.
3826 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3827 // If the callee is a GlobalAddress node (quite common, every direct call
3828 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3829 // it.
3830 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3831
3832 // We should use extra load for direct calls to dllimported functions in
3833 // non-JIT mode.
3834 const GlobalValue *GV = G->getGlobal();
3835 if (!GV->hasDLLImportStorageClass()) {
3836 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3837
3838 Callee = DAG.getTargetGlobalAddress(
3839 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3840
3841 if (OpFlags == X86II::MO_GOTPCREL) {
3842 // Add a wrapper.
3843 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3844 getPointerTy(DAG.getDataLayout()), Callee);
3845 // Add extra indirection
3846 Callee = DAG.getLoad(
3847 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3848 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3849 }
3850 }
3851 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3852 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
3853 unsigned char OpFlags =
3854 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3855
3856 Callee = DAG.getTargetExternalSymbol(
3857 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3858
3859 if (OpFlags == X86II::MO_GOTPCREL) {
3860 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3861 getPointerTy(DAG.getDataLayout()), Callee);
3862 Callee = DAG.getLoad(
3863 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3864 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3865 }
3866 } else if (Subtarget.isTarget64BitILP32() &&
3867 Callee->getValueType(0) == MVT::i32) {
3868 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3869 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3870 }
3871
3872 // Returns a chain & a flag for retval copy to use.
3873 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3874 SmallVector<SDValue, 8> Ops;
3875
3876 if (!IsSibcall && isTailCall) {
3877 Chain = DAG.getCALLSEQ_END(Chain,
3878 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3879 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3880 InFlag = Chain.getValue(1);
3881 }
3882
3883 Ops.push_back(Chain);
3884 Ops.push_back(Callee);
3885
3886 if (isTailCall)
3887 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3888
3889 // Add argument registers to the end of the list so that they are known live
3890 // into the call.
3891 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3892 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3893 RegsToPass[i].second.getValueType()));
3894
3895 // Add a register mask operand representing the call-preserved registers.
3896 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3897 // set X86_INTR calling convention because it has the same CSR mask
3898 // (same preserved registers).
3899 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3900 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3901 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 3901, __PRETTY_FUNCTION__))
;
3902
3903 // If this is an invoke in a 32-bit function using a funclet-based
3904 // personality, assume the function clobbers all registers. If an exception
3905 // is thrown, the runtime will not restore CSRs.
3906 // FIXME: Model this more precisely so that we can register allocate across
3907 // the normal edge and spill and fill across the exceptional edge.
3908 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
3909 const Function &CallerFn = MF.getFunction();
3910 EHPersonality Pers =
3911 CallerFn.hasPersonalityFn()
3912 ? classifyEHPersonality(CallerFn.getPersonalityFn())
3913 : EHPersonality::Unknown;
3914 if (isFuncletEHPersonality(Pers))
3915 Mask = RegInfo->getNoPreservedMask();
3916 }
3917
3918 // Define a new register mask from the existing mask.
3919 uint32_t *RegMask = nullptr;
3920
3921 // In some calling conventions we need to remove the used physical registers
3922 // from the reg mask.
3923 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
3924 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3925
3926 // Allocate a new Reg Mask and copy Mask.
3927 RegMask = MF.allocateRegMask();
3928 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
3929 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
3930
3931 // Make sure all sub registers of the argument registers are reset
3932 // in the RegMask.
3933 for (auto const &RegPair : RegsToPass)
3934 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
3935 SubRegs.isValid(); ++SubRegs)
3936 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3937
3938 // Create the RegMask Operand according to our updated mask.
3939 Ops.push_back(DAG.getRegisterMask(RegMask));
3940 } else {
3941 // Create the RegMask Operand according to the static mask.
3942 Ops.push_back(DAG.getRegisterMask(Mask));
3943 }
3944
3945 if (InFlag.getNode())
3946 Ops.push_back(InFlag);
3947
3948 if (isTailCall) {
3949 // We used to do:
3950 //// If this is the first return lowered for this function, add the regs
3951 //// to the liveout set for the function.
3952 // This isn't right, although it's probably harmless on x86; liveouts
3953 // should be computed from returns not tail calls. Consider a void
3954 // function making a tail call to a function returning int.
3955 MF.getFrameInfo().setHasTailCall();
3956 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3957 }
3958
3959 if (HasNoCfCheck && IsCFProtectionSupported) {
3960 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
3961 } else {
3962 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3963 }
3964 InFlag = Chain.getValue(1);
3965
3966 // Create the CALLSEQ_END node.
3967 unsigned NumBytesForCalleeToPop;
3968 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3969 DAG.getTarget().Options.GuaranteedTailCallOpt))
3970 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3971 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3972 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3973 SR == StackStructReturn)
3974 // If this is a call to a struct-return function, the callee
3975 // pops the hidden struct pointer, so we have to push it back.
3976 // This is common for Darwin/X86, Linux & Mingw32 targets.
3977 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3978 NumBytesForCalleeToPop = 4;
3979 else
3980 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3981
3982 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
3983 // No need to reset the stack after the call if the call doesn't return. To
3984 // make the MI verify, we'll pretend the callee does it for us.
3985 NumBytesForCalleeToPop = NumBytes;
3986 }
3987
3988 // Returns a flag for retval copy to use.
3989 if (!IsSibcall) {
3990 Chain = DAG.getCALLSEQ_END(Chain,
3991 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3992 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3993 true),
3994 InFlag, dl);
3995 InFlag = Chain.getValue(1);
3996 }
3997
3998 // Handle result values, copying them out of physregs into vregs that we
3999 // return.
4000 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4001 InVals, RegMask);
4002}
4003
4004//===----------------------------------------------------------------------===//
4005// Fast Calling Convention (tail call) implementation
4006//===----------------------------------------------------------------------===//
4007
4008// Like std call, callee cleans arguments, convention except that ECX is
4009// reserved for storing the tail called function address. Only 2 registers are
4010// free for argument passing (inreg). Tail call optimization is performed
4011// provided:
4012// * tailcallopt is enabled
4013// * caller/callee are fastcc
4014// On X86_64 architecture with GOT-style position independent code only local
4015// (within module) calls are supported at the moment.
4016// To keep the stack aligned according to platform abi the function
4017// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4018// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4019// If a tail called function callee has more arguments than the caller the
4020// caller needs to make sure that there is room to move the RETADDR to. This is
4021// achieved by reserving an area the size of the argument delta right after the
4022// original RETADDR, but before the saved framepointer or the spilled registers
4023// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4024// stack layout:
4025// arg1
4026// arg2
4027// RETADDR
4028// [ new RETADDR
4029// move area ]
4030// (possible EBP)
4031// ESI
4032// EDI
4033// local1 ..
4034
4035/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4036/// requirement.
4037unsigned
4038X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
4039 SelectionDAG& DAG) const {
4040 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4041 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
4042 unsigned StackAlignment = TFI.getStackAlignment();
4043 uint64_t AlignMask = StackAlignment - 1;
4044 int64_t Offset = StackSize;
4045 unsigned SlotSize = RegInfo->getSlotSize();
4046 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
4047 // Number smaller than 12 so just add the difference.
4048 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
4049 } else {
4050 // Mask out lower bits, add stackalignment once plus the 12 bytes.
4051 Offset = ((~AlignMask) & Offset) + StackAlignment +
4052 (StackAlignment-SlotSize);
4053 }
4054 return Offset;
4055}
4056
4057/// Return true if the given stack call argument is already available in the
4058/// same position (relatively) of the caller's incoming argument stack.
4059static
4060bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4061 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4062 const X86InstrInfo *TII, const CCValAssign &VA) {
4063 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4064
4065 for (;;) {
4066 // Look through nodes that don't alter the bits of the incoming value.
4067 unsigned Op = Arg.getOpcode();
4068 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4069 Arg = Arg.getOperand(0);
4070 continue;
4071 }
4072 if (Op == ISD::TRUNCATE) {
4073 const SDValue &TruncInput = Arg.getOperand(0);
4074 if (TruncInput.getOpcode() == ISD::AssertZext &&
4075 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4076 Arg.getValueType()) {
4077 Arg = TruncInput.getOperand(0);
4078 continue;
4079 }
4080 }
4081 break;
4082 }
4083
4084 int FI = INT_MAX2147483647;
4085 if (Arg.getOpcode() == ISD::CopyFromReg) {
4086 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4087 if (!TargetRegisterInfo::isVirtualRegister(VR))
4088 return false;
4089 MachineInstr *Def = MRI->getVRegDef(VR);
4090 if (!Def)
4091 return false;
4092 if (!Flags.isByVal()) {
4093 if (!TII->isLoadFromStackSlot(*Def, FI))
4094 return false;
4095 } else {
4096 unsigned Opcode = Def->getOpcode();
4097 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4098 Opcode == X86::LEA64_32r) &&
4099 Def->getOperand(1).isFI()) {
4100 FI = Def->getOperand(1).getIndex();
4101 Bytes = Flags.getByValSize();
4102 } else
4103 return false;
4104 }
4105 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4106 if (Flags.isByVal())
4107 // ByVal argument is passed in as a pointer but it's now being
4108 // dereferenced. e.g.
4109 // define @foo(%struct.X* %A) {
4110 // tail call @bar(%struct.X* byval %A)
4111 // }
4112 return false;
4113 SDValue Ptr = Ld->getBasePtr();
4114 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4115 if (!FINode)
4116 return false;
4117 FI = FINode->getIndex();
4118 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4119 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4120 FI = FINode->getIndex();
4121 Bytes = Flags.getByValSize();
4122 } else
4123 return false;
4124
4125 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 4125, __PRETTY_FUNCTION__))
;
4126 if (!MFI.isFixedObjectIndex(FI))
4127 return false;
4128
4129 if (Offset != MFI.getObjectOffset(FI))
4130 return false;
4131
4132 // If this is not byval, check that the argument stack object is immutable.
4133 // inalloca and argument copy elision can create mutable argument stack
4134 // objects. Byval objects can be mutated, but a byval call intends to pass the
4135 // mutated memory.
4136 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4137 return false;
4138
4139 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4140 // If the argument location is wider than the argument type, check that any
4141 // extension flags match.
4142 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4143 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4144 return false;
4145 }
4146 }
4147
4148 return Bytes == MFI.getObjectSize(FI);
4149}
4150
4151/// Check whether the call is eligible for tail call optimization. Targets
4152/// that want to do tail call optimization should implement this function.
4153bool X86TargetLowering::IsEligibleForTailCallOptimization(
4154 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4155 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4156 const SmallVectorImpl<ISD::OutputArg> &Outs,
4157 const SmallVectorImpl<SDValue> &OutVals,
4158 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4159 if (!mayTailCallThisCC(CalleeCC))
4160 return false;
4161
4162 // If -tailcallopt is specified, make fastcc functions tail-callable.
4163 MachineFunction &MF = DAG.getMachineFunction();
4164 const Function &CallerF = MF.getFunction();
4165
4166 // If the function return type is x86_fp80 and the callee return type is not,
4167 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4168 // perform a tailcall optimization here.
4169 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4170 return false;
4171
4172 CallingConv::ID CallerCC = CallerF.getCallingConv();
4173 bool CCMatch = CallerCC == CalleeCC;
4174 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4175 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4176
4177 // Win64 functions have extra shadow space for argument homing. Don't do the
4178 // sibcall if the caller and callee have mismatched expectations for this
4179 // space.
4180 if (IsCalleeWin64 != IsCallerWin64)
4181 return false;
4182
4183 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4184 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4185 return true;
4186 return false;
4187 }
4188
4189 // Look for obvious safe cases to perform tail call optimization that do not
4190 // require ABI changes. This is what gcc calls sibcall.
4191
4192 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4193 // emit a special epilogue.
4194 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4195 if (RegInfo->needsStackRealignment(MF))
4196 return false;
4197
4198 // Also avoid sibcall optimization if either caller or callee uses struct
4199 // return semantics.
4200 if (isCalleeStructRet || isCallerStructRet)
4201 return false;
4202
4203 // Do not sibcall optimize vararg calls unless all arguments are passed via
4204 // registers.
4205 LLVMContext &C = *DAG.getContext();
4206 if (isVarArg && !Outs.empty()) {
4207 // Optimizing for varargs on Win64 is unlikely to be safe without
4208 // additional testing.
4209 if (IsCalleeWin64 || IsCallerWin64)
4210 return false;
4211
4212 SmallVector<CCValAssign, 16> ArgLocs;
4213 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4214
4215 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4216 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4217 if (!ArgLocs[i].isRegLoc())
4218 return false;
4219 }
4220
4221 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4222 // stack. Therefore, if it's not used by the call it is not safe to optimize
4223 // this into a sibcall.
4224 bool Unused = false;
4225 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4226 if (!Ins[i].Used) {
4227 Unused = true;
4228 break;
4229 }
4230 }
4231 if (Unused) {
4232 SmallVector<CCValAssign, 16> RVLocs;
4233 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4234 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4235 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4236 CCValAssign &VA = RVLocs[i];
4237 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4238 return false;
4239 }
4240 }
4241
4242 // Check that the call results are passed in the same way.
4243 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4244 RetCC_X86, RetCC_X86))
4245 return false;
4246 // The callee has to preserve all registers the caller needs to preserve.
4247 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4248 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4249 if (!CCMatch) {
4250 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4251 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4252 return false;
4253 }
4254
4255 unsigned StackArgsSize = 0;
4256
4257 // If the callee takes no arguments then go on to check the results of the
4258 // call.
4259 if (!Outs.empty()) {
4260 // Check if stack adjustment is needed. For now, do not do this if any
4261 // argument is passed on the stack.
4262 SmallVector<CCValAssign, 16> ArgLocs;
4263 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4264
4265 // Allocate shadow area for Win64
4266 if (IsCalleeWin64)
4267 CCInfo.AllocateStack(32, 8);
4268
4269 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4270 StackArgsSize = CCInfo.getNextStackOffset();
4271
4272 if (CCInfo.getNextStackOffset()) {
4273 // Check if the arguments are already laid out in the right way as
4274 // the caller's fixed stack objects.
4275 MachineFrameInfo &MFI = MF.getFrameInfo();
4276 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4277 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4278 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4279 CCValAssign &VA = ArgLocs[i];
4280 SDValue Arg = OutVals[i];
4281 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4282 if (VA.getLocInfo() == CCValAssign::Indirect)
4283 return false;
4284 if (!VA.isRegLoc()) {
4285 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4286 MFI, MRI, TII, VA))
4287 return false;
4288 }
4289 }
4290 }
4291
4292 bool PositionIndependent = isPositionIndependent();
4293 // If the tailcall address may be in a register, then make sure it's
4294 // possible to register allocate for it. In 32-bit, the call address can
4295 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4296 // callee-saved registers are restored. These happen to be the same
4297 // registers used to pass 'inreg' arguments so watch out for those.
4298 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4299 !isa<ExternalSymbolSDNode>(Callee)) ||
4300 PositionIndependent)) {
4301 unsigned NumInRegs = 0;
4302 // In PIC we need an extra register to formulate the address computation
4303 // for the callee.
4304 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4305
4306 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4307 CCValAssign &VA = ArgLocs[i];
4308 if (!VA.isRegLoc())
4309 continue;
4310 unsigned Reg = VA.getLocReg();
4311 switch (Reg) {
4312 default: break;
4313 case X86::EAX: case X86::EDX: case X86::ECX:
4314 if (++NumInRegs == MaxInRegs)
4315 return false;
4316 break;
4317 }
4318 }
4319 }
4320
4321 const MachineRegisterInfo &MRI = MF.getRegInfo();
4322 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4323 return false;
4324 }
4325
4326 bool CalleeWillPop =
4327 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4328 MF.getTarget().Options.GuaranteedTailCallOpt);
4329
4330 if (unsigned BytesToPop =
4331 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4332 // If we have bytes to pop, the callee must pop them.
4333 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4334 if (!CalleePopMatches)
4335 return false;
4336 } else if (CalleeWillPop && StackArgsSize > 0) {
4337 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4338 return false;
4339 }
4340
4341 return true;
4342}
4343
4344FastISel *
4345X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4346 const TargetLibraryInfo *libInfo) const {
4347 return X86::createFastISel(funcInfo, libInfo);
4348}
4349
4350//===----------------------------------------------------------------------===//
4351// Other Lowering Hooks
4352//===----------------------------------------------------------------------===//
4353
4354static bool MayFoldLoad(SDValue Op) {
4355 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4356}
4357
4358static bool MayFoldIntoStore(SDValue Op) {
4359 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4360}
4361
4362static bool MayFoldIntoZeroExtend(SDValue Op) {
4363 if (Op.hasOneUse()) {
4364 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4365 return (ISD::ZERO_EXTEND == Opcode);
4366 }
4367 return false;
4368}
4369
4370static bool isTargetShuffle(unsigned Opcode) {
4371 switch(Opcode) {
4372 default: return false;
4373 case X86ISD::BLENDI:
4374 case X86ISD::PSHUFB:
4375 case X86ISD::PSHUFD:
4376 case X86ISD::PSHUFHW:
4377 case X86ISD::PSHUFLW:
4378 case X86ISD::SHUFP:
4379 case X86ISD::INSERTPS:
4380 case X86ISD::EXTRQI:
4381 case X86ISD::INSERTQI:
4382 case X86ISD::PALIGNR:
4383 case X86ISD::VSHLDQ:
4384 case X86ISD::VSRLDQ:
4385 case X86ISD::MOVLHPS:
4386 case X86ISD::MOVHLPS:
4387 case X86ISD::MOVSHDUP:
4388 case X86ISD::MOVSLDUP:
4389 case X86ISD::MOVDDUP:
4390 case X86ISD::MOVSS:
4391 case X86ISD::MOVSD:
4392 case X86ISD::UNPCKL:
4393 case X86ISD::UNPCKH:
4394 case X86ISD::VBROADCAST:
4395 case X86ISD::VPERMILPI:
4396 case X86ISD::VPERMILPV:
4397 case X86ISD::VPERM2X128:
4398 case X86ISD::SHUF128:
4399 case X86ISD::VPERMIL2:
4400 case X86ISD::VPERMI:
4401 case X86ISD::VPPERM:
4402 case X86ISD::VPERMV:
4403 case X86ISD::VPERMV3:
4404 case X86ISD::VZEXT_MOVL:
4405 return true;
4406 }
4407}
4408
4409static bool isTargetShuffleVariableMask(unsigned Opcode) {
4410 switch (Opcode) {
4411 default: return false;
4412 // Target Shuffles.
4413 case X86ISD::PSHUFB:
4414 case X86ISD::VPERMILPV:
4415 case X86ISD::VPERMIL2:
4416 case X86ISD::VPPERM:
4417 case X86ISD::VPERMV:
4418 case X86ISD::VPERMV3:
4419 return true;
4420 // 'Faux' Target Shuffles.
4421 case ISD::OR:
4422 case ISD::AND:
4423 case X86ISD::ANDNP:
4424 return true;
4425 }
4426}
4427
4428SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4429 MachineFunction &MF = DAG.getMachineFunction();
4430 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4431 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4432 int ReturnAddrIndex = FuncInfo->getRAIndex();
4433
4434 if (ReturnAddrIndex == 0) {
4435 // Set up a frame object for the return address.
4436 unsigned SlotSize = RegInfo->getSlotSize();
4437 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4438 -(int64_t)SlotSize,
4439 false);
4440 FuncInfo->setRAIndex(ReturnAddrIndex);
4441 }
4442
4443 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4444}
4445
4446bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4447 bool hasSymbolicDisplacement) {
4448 // Offset should fit into 32 bit immediate field.
4449 if (!isInt<32>(Offset))
4450 return false;
4451
4452 // If we don't have a symbolic displacement - we don't have any extra
4453 // restrictions.
4454 if (!hasSymbolicDisplacement)
4455 return true;
4456
4457 // FIXME: Some tweaks might be needed for medium code model.
4458 if (M != CodeModel::Small && M != CodeModel::Kernel)
4459 return false;
4460
4461 // For small code model we assume that latest object is 16MB before end of 31
4462 // bits boundary. We may also accept pretty large negative constants knowing
4463 // that all objects are in the positive half of address space.
4464 if (M == CodeModel::Small && Offset < 16*1024*1024)
4465 return true;
4466
4467 // For kernel code model we know that all object resist in the negative half
4468 // of 32bits address space. We may not accept negative offsets, since they may
4469 // be just off and we may accept pretty large positive ones.
4470 if (M == CodeModel::Kernel && Offset >= 0)
4471 return true;
4472
4473 return false;
4474}
4475
4476/// Determines whether the callee is required to pop its own arguments.
4477/// Callee pop is necessary to support tail calls.
4478bool X86::isCalleePop(CallingConv::ID CallingConv,
4479 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4480 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4481 // can guarantee TCO.
4482 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4483 return true;
4484
4485 switch (CallingConv) {
4486 default:
4487 return false;
4488 case CallingConv::X86_StdCall:
4489 case CallingConv::X86_FastCall:
4490 case CallingConv::X86_ThisCall:
4491 case CallingConv::X86_VectorCall:
4492 return !is64Bit;
4493 }
4494}
4495
4496/// Return true if the condition is an unsigned comparison operation.
4497static bool isX86CCUnsigned(unsigned X86CC) {
4498 switch (X86CC) {
4499 default:
4500 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 4500)
;
4501 case X86::COND_E:
4502 case X86::COND_NE:
4503 case X86::COND_B:
4504 case X86::COND_A:
4505 case X86::COND_BE:
4506 case X86::COND_AE:
4507 return true;
4508 case X86::COND_G:
4509 case X86::COND_GE:
4510 case X86::COND_L:
4511 case X86::COND_LE:
4512 return false;
4513 }
4514}
4515
4516static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4517 switch (SetCCOpcode) {
4518 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 4518)
;
4519 case ISD::SETEQ: return X86::COND_E;
4520 case ISD::SETGT: return X86::COND_G;
4521 case ISD::SETGE: return X86::COND_GE;
4522 case ISD::SETLT: return X86::COND_L;
4523 case ISD::SETLE: return X86::COND_LE;
4524 case ISD::SETNE: return X86::COND_NE;
4525 case ISD::SETULT: return X86::COND_B;
4526 case ISD::SETUGT: return X86::COND_A;
4527 case ISD::SETULE: return X86::COND_BE;
4528 case ISD::SETUGE: return X86::COND_AE;
4529 }
4530}
4531
4532/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4533/// condition code, returning the condition code and the LHS/RHS of the
4534/// comparison to make.
4535static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4536 bool isFP, SDValue &LHS, SDValue &RHS,
4537 SelectionDAG &DAG) {
4538 if (!isFP) {
4539 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4540 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4541 // X > -1 -> X == 0, jump !sign.
4542 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4543 return X86::COND_NS;
4544 }
4545 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4546 // X < 0 -> X == 0, jump on sign.
4547 return X86::COND_S;
4548 }
4549 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4550 // X < 1 -> X <= 0
4551 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4552 return X86::COND_LE;
4553 }
4554 }
4555
4556 return TranslateIntegerX86CC(SetCCOpcode);
4557 }
4558
4559 // First determine if it is required or is profitable to flip the operands.
4560
4561 // If LHS is a foldable load, but RHS is not, flip the condition.
4562 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4563 !ISD::isNON_EXTLoad(RHS.getNode())) {
4564 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4565 std::swap(LHS, RHS);
4566 }
4567
4568 switch (SetCCOpcode) {
4569 default: break;
4570 case ISD::SETOLT:
4571 case ISD::SETOLE:
4572 case ISD::SETUGT:
4573 case ISD::SETUGE:
4574 std::swap(LHS, RHS);
4575 break;
4576 }
4577
4578 // On a floating point condition, the flags are set as follows:
4579 // ZF PF CF op
4580 // 0 | 0 | 0 | X > Y
4581 // 0 | 0 | 1 | X < Y
4582 // 1 | 0 | 0 | X == Y
4583 // 1 | 1 | 1 | unordered
4584 switch (SetCCOpcode) {
4585 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 4585)
;
4586 case ISD::SETUEQ:
4587 case ISD::SETEQ: return X86::COND_E;
4588 case ISD::SETOLT: // flipped
4589 case ISD::SETOGT:
4590 case ISD::SETGT: return X86::COND_A;
4591 case ISD::SETOLE: // flipped
4592 case ISD::SETOGE:
4593 case ISD::SETGE: return X86::COND_AE;
4594 case ISD::SETUGT: // flipped
4595 case ISD::SETULT:
4596 case ISD::SETLT: return X86::COND_B;
4597 case ISD::SETUGE: // flipped
4598 case ISD::SETULE:
4599 case ISD::SETLE: return X86::COND_BE;
4600 case ISD::SETONE:
4601 case ISD::SETNE: return X86::COND_NE;
4602 case ISD::SETUO: return X86::COND_P;
4603 case ISD::SETO: return X86::COND_NP;
4604 case ISD::SETOEQ:
4605 case ISD::SETUNE: return X86::COND_INVALID;
4606 }
4607}
4608
4609/// Is there a floating point cmov for the specific X86 condition code?
4610/// Current x86 isa includes the following FP cmov instructions:
4611/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4612static bool hasFPCMov(unsigned X86CC) {
4613 switch (X86CC) {
4614 default:
4615 return false;
4616 case X86::COND_B:
4617 case X86::COND_BE:
4618 case X86::COND_E:
4619 case X86::COND_P:
4620 case X86::COND_A:
4621 case X86::COND_AE:
4622 case X86::COND_NE:
4623 case X86::COND_NP:
4624 return true;
4625 }
4626}
4627
4628
4629bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4630 const CallInst &I,
4631 MachineFunction &MF,
4632 unsigned Intrinsic) const {
4633
4634 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4635 if (!IntrData)
4636 return false;
4637
4638 Info.opc = ISD::INTRINSIC_W_CHAIN;
4639 Info.flags = MachineMemOperand::MONone;
4640 Info.offset = 0;
4641
4642 switch (IntrData->Type) {
4643 case TRUNCATE_TO_MEM_VI8:
4644 case TRUNCATE_TO_MEM_VI16:
4645 case TRUNCATE_TO_MEM_VI32: {
4646 Info.ptrVal = I.getArgOperand(0);
4647 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4648 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4649 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4650 ScalarVT = MVT::i8;
4651 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4652 ScalarVT = MVT::i16;
4653 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4654 ScalarVT = MVT::i32;
4655
4656 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4657 Info.align = 1;
4658 Info.flags |= MachineMemOperand::MOStore;
4659 break;
4660 }
4661 default:
4662 return false;
4663 }
4664
4665 return true;
4666}
4667
4668/// Returns true if the target can instruction select the
4669/// specified FP immediate natively. If false, the legalizer will
4670/// materialize the FP immediate as a load from a constant pool.
4671bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4672 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4673 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4674 return true;
4675 }
4676 return false;
4677}
4678
4679bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4680 ISD::LoadExtType ExtTy,
4681 EVT NewVT) const {
4682 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4683 // relocation target a movq or addq instruction: don't let the load shrink.
4684 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4685 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4686 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4687 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4688 return true;
4689}
4690
4691/// Returns true if it is beneficial to convert a load of a constant
4692/// to just the constant itself.
4693bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4694 Type *Ty) const {
4695 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 4695, __PRETTY_FUNCTION__))
;
4696
4697 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4698 if (BitSize == 0 || BitSize > 64)
4699 return false;
4700 return true;
4701}
4702
4703bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4704 // TODO: It might be a win to ease or lift this restriction, but the generic
4705 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4706 if (VT.isVector() && Subtarget.hasAVX512())
4707 return false;
4708
4709 return true;
4710}
4711
4712bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
4713 // TODO: We handle scalars using custom code, but generic combining could make
4714 // that unnecessary.
4715 APInt MulC;
4716 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
4717 return false;
4718
4719 // If vector multiply is legal, assume that's faster than shl + add/sub.
4720 // TODO: Multiply is a complex op with higher latency and lower througput in
4721 // most implementations, so this check could be loosened based on type
4722 // and/or a CPU attribute.
4723 if (isOperationLegal(ISD::MUL, VT))
4724 return false;
4725
4726 // shl+add, shl+sub, shl+add+neg
4727 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
4728 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
4729}
4730
4731bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4732 unsigned Index) const {
4733 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4734 return false;
4735
4736 // Mask vectors support all subregister combinations and operations that
4737 // extract half of vector.
4738 if (ResVT.getVectorElementType() == MVT::i1)
4739 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4740 (Index == ResVT.getVectorNumElements()));
4741
4742 return (Index % ResVT.getVectorNumElements()) == 0;
4743}
4744
4745bool X86TargetLowering::isCheapToSpeculateCttz() const {
4746 // Speculate cttz only if we can directly use TZCNT.
4747 return Subtarget.hasBMI();
4748}
4749
4750bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4751 // Speculate ctlz only if we can directly use LZCNT.
4752 return Subtarget.hasLZCNT();
4753}
4754
4755bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
4756 EVT BitcastVT) const {
4757 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
4758 return false;
4759
4760 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
4761}
4762
4763bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4764 const SelectionDAG &DAG) const {
4765 // Do not merge to float value size (128 bytes) if no implicit
4766 // float attribute is set.
4767 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
4768 Attribute::NoImplicitFloat);
4769
4770 if (NoFloat) {
4771 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4772 return (MemVT.getSizeInBits() <= MaxIntSize);
4773 }
4774 return true;
4775}
4776
4777bool X86TargetLowering::isCtlzFast() const {
4778 return Subtarget.hasFastLZCNT();
4779}
4780
4781bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4782 const Instruction &AndI) const {
4783 return true;
4784}
4785
4786bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4787 EVT VT = Y.getValueType();
4788
4789 if (VT.isVector())
4790 return false;
4791
4792 if (!Subtarget.hasBMI())
4793 return false;
4794
4795 // There are only 32-bit and 64-bit forms for 'andn'.
4796 if (VT != MVT::i32 && VT != MVT::i64)
4797 return false;
4798
4799 // A mask and compare against constant is ok for an 'andn' too
4800 // even though the BMI instruction doesn't have an immediate form.
4801
4802 return true;
4803}
4804
4805bool X86TargetLowering::hasAndNot(SDValue Y) const {
4806 EVT VT = Y.getValueType();
4807
4808 if (!VT.isVector()) // x86 can't form 'andn' with an immediate.
4809 return !isa<ConstantSDNode>(Y) && hasAndNotCompare(Y);
4810
4811 // Vector.
4812
4813 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
4814 return false;
4815
4816 if (VT == MVT::v4i32)
4817 return true;
4818
4819 return Subtarget.hasSSE2();
4820}
4821
4822bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
4823 EVT VT = Y.getValueType();
4824
4825 // For vectors, we don't have a preference, but we probably want a mask.
4826 if (VT.isVector())
4827 return false;
4828
4829 // 64-bit shifts on 32-bit targets produce really bad bloated code.
4830 if (VT == MVT::i64 && !Subtarget.is64Bit())
4831 return false;
4832
4833 return true;
4834}
4835
4836bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
4837 // Any legal vector type can be splatted more efficiently than
4838 // loading/spilling from memory.
4839 return isTypeLegal(VT);
4840}
4841
4842MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
4843 MVT VT = MVT::getIntegerVT(NumBits);
4844 if (isTypeLegal(VT))
4845 return VT;
4846
4847 // PMOVMSKB can handle this.
4848 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
4849 return MVT::v16i8;
4850
4851 // VPMOVMSKB can handle this.
4852 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
4853 return MVT::v32i8;
4854
4855 // TODO: Allow 64-bit type for 32-bit target.
4856 // TODO: 512-bit types should be allowed, but make sure that those
4857 // cases are handled in combineVectorSizedSetCCEquality().
4858
4859 return MVT::INVALID_SIMPLE_VALUE_TYPE;
4860}
4861
4862/// Val is the undef sentinel value or equal to the specified value.
4863static bool isUndefOrEqual(int Val, int CmpVal) {
4864 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
4865}
4866
4867/// Val is either the undef or zero sentinel value.
4868static bool isUndefOrZero(int Val) {
4869 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
4870}
4871
4872/// Return true if every element in Mask, beginning
4873/// from position Pos and ending in Pos+Size is the undef sentinel value.
4874static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
4875 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4876 if (Mask[i] != SM_SentinelUndef)
4877 return false;
4878 return true;
4879}
4880
4881/// Return true if Val falls within the specified range (L, H].
4882static bool isInRange(int Val, int Low, int Hi) {
4883 return (Val >= Low && Val < Hi);
4884}
4885
4886/// Return true if the value of any element in Mask falls within the specified
4887/// range (L, H].
4888static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
4889 for (int M : Mask)
4890 if (isInRange(M, Low, Hi))
4891 return true;
4892 return false;
4893}
4894
4895/// Return true if Val is undef or if its value falls within the
4896/// specified range (L, H].
4897static bool isUndefOrInRange(int Val, int Low, int Hi) {
4898 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
4899}
4900
4901/// Return true if every element in Mask is undef or if its value
4902/// falls within the specified range (L, H].
4903static bool isUndefOrInRange(ArrayRef<int> Mask,
4904 int Low, int Hi) {
4905 for (int M : Mask)
4906 if (!isUndefOrInRange(M, Low, Hi))
4907 return false;
4908 return true;
4909}
4910
4911/// Return true if Val is undef, zero or if its value falls within the
4912/// specified range (L, H].
4913static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
4914 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
4915}
4916
4917/// Return true if every element in Mask is undef, zero or if its value
4918/// falls within the specified range (L, H].
4919static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
4920 for (int M : Mask)
4921 if (!isUndefOrZeroOrInRange(M, Low, Hi))
4922 return false;
4923 return true;
4924}
4925
4926/// Return true if every element in Mask, beginning
4927/// from position Pos and ending in Pos + Size, falls within the specified
4928/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
4929static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
4930 unsigned Size, int Low, int Step = 1) {
4931 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
4932 if (!isUndefOrEqual(Mask[i], Low))
4933 return false;
4934 return true;
4935}
4936
4937/// Return true if every element in Mask, beginning
4938/// from position Pos and ending in Pos+Size, falls within the specified
4939/// sequential range (Low, Low+Size], or is undef or is zero.
4940static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4941 unsigned Size, int Low) {
4942 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
4943 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
4944 return false;
4945 return true;
4946}
4947
4948/// Return true if every element in Mask, beginning
4949/// from position Pos and ending in Pos+Size is undef or is zero.
4950static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4951 unsigned Size) {
4952 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4953 if (!isUndefOrZero(Mask[i]))
4954 return false;
4955 return true;
4956}
4957
4958/// Helper function to test whether a shuffle mask could be
4959/// simplified by widening the elements being shuffled.
4960///
4961/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
4962/// leaves it in an unspecified state.
4963///
4964/// NOTE: This must handle normal vector shuffle masks and *target* vector
4965/// shuffle masks. The latter have the special property of a '-2' representing
4966/// a zero-ed lane of a vector.
4967static bool canWidenShuffleElements(ArrayRef<int> Mask,
4968 SmallVectorImpl<int> &WidenedMask) {
4969 WidenedMask.assign(Mask.size() / 2, 0);
4970 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
4971 int M0 = Mask[i];
4972 int M1 = Mask[i + 1];
4973
4974 // If both elements are undef, its trivial.
4975 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
4976 WidenedMask[i / 2] = SM_SentinelUndef;
4977 continue;
4978 }
4979
4980 // Check for an undef mask and a mask value properly aligned to fit with
4981 // a pair of values. If we find such a case, use the non-undef mask's value.
4982 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
4983 WidenedMask[i / 2] = M1 / 2;
4984 continue;
4985 }
4986 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
4987 WidenedMask[i / 2] = M0 / 2;
4988 continue;
4989 }
4990
4991 // When zeroing, we need to spread the zeroing across both lanes to widen.
4992 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
4993 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
4994 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
4995 WidenedMask[i / 2] = SM_SentinelZero;
4996 continue;
4997 }
4998 return false;
4999 }
5000
5001 // Finally check if the two mask values are adjacent and aligned with
5002 // a pair.
5003 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5004 WidenedMask[i / 2] = M0 / 2;
5005 continue;
5006 }
5007
5008 // Otherwise we can't safely widen the elements used in this shuffle.
5009 return false;
5010 }
5011 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5012, __PRETTY_FUNCTION__))
5012 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5012, __PRETTY_FUNCTION__))
;
5013
5014 return true;
5015}
5016
5017static bool canWidenShuffleElements(ArrayRef<int> Mask,
5018 const APInt &Zeroable,
5019 SmallVectorImpl<int> &WidenedMask) {
5020 SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
5021 for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
5022 if (TargetMask[i] == SM_SentinelUndef)
5023 continue;
5024 if (Zeroable[i])
5025 TargetMask[i] = SM_SentinelZero;
5026 }
5027 return canWidenShuffleElements(TargetMask, WidenedMask);
5028}
5029
5030static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5031 SmallVector<int, 32> WidenedMask;
5032 return canWidenShuffleElements(Mask, WidenedMask);
5033}
5034
5035/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5036bool X86::isZeroNode(SDValue Elt) {
5037 return isNullConstant(Elt) || isNullFPConstant(Elt);
5038}
5039
5040// Build a vector of constants.
5041// Use an UNDEF node if MaskElt == -1.
5042// Split 64-bit constants in the 32-bit mode.
5043static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5044 const SDLoc &dl, bool IsMask = false) {
5045
5046 SmallVector<SDValue, 32> Ops;
5047 bool Split = false;
5048
5049 MVT ConstVecVT = VT;
5050 unsigned NumElts = VT.getVectorNumElements();
5051 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5052 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5053 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5054 Split = true;
5055 }
5056
5057 MVT EltVT = ConstVecVT.getVectorElementType();
5058 for (unsigned i = 0; i < NumElts; ++i) {
5059 bool IsUndef = Values[i] < 0 && IsMask;
5060 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5061 DAG.getConstant(Values[i], dl, EltVT);
5062 Ops.push_back(OpNode);
5063 if (Split)
5064 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5065 DAG.getConstant(0, dl, EltVT));
5066 }
5067 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5068 if (Split)
5069 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5070 return ConstsNode;
5071}
5072
5073static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5074 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5075 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5076, __PRETTY_FUNCTION__))
5076 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5076, __PRETTY_FUNCTION__))
;
5077 SmallVector<SDValue, 32> Ops;
5078 bool Split = false;
5079
5080 MVT ConstVecVT = VT;
5081 unsigned NumElts = VT.getVectorNumElements();
5082 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5083 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5084 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5085 Split = true;
5086 }
5087
5088 MVT EltVT = ConstVecVT.getVectorElementType();
5089 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5090 if (Undefs[i]) {
5091 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5092 continue;
5093 }
5094 const APInt &V = Bits[i];
5095 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5095, __PRETTY_FUNCTION__))
;
5096 if (Split) {
5097 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5098 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5099 } else if (EltVT == MVT::f32) {
5100 APFloat FV(APFloat::IEEEsingle(), V);
5101 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5102 } else if (EltVT == MVT::f64) {
5103 APFloat FV(APFloat::IEEEdouble(), V);
5104 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5105 } else {
5106 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5107 }
5108 }
5109
5110 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5111 return DAG.getBitcast(VT, ConstsNode);
5112}
5113
5114/// Returns a vector of specified type with all zero elements.
5115static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5116 SelectionDAG &DAG, const SDLoc &dl) {
5117 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5119, __PRETTY_FUNCTION__))
5118 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5119, __PRETTY_FUNCTION__))
5119 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5119, __PRETTY_FUNCTION__))
;
5120
5121 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5122 // type. This ensures they get CSE'd. But if the integer type is not
5123 // available, use a floating-point +0.0 instead.
5124 SDValue Vec;
5125 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5126 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5127 } else if (VT.getVectorElementType() == MVT::i1) {
5128 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5129, __PRETTY_FUNCTION__))
5129 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5129, __PRETTY_FUNCTION__))
;
5130 Vec = DAG.getConstant(0, dl, VT);
5131 } else {
5132 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5133 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5134 }
5135 return DAG.getBitcast(VT, Vec);
5136}
5137
5138static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5139 const SDLoc &dl, unsigned vectorWidth) {
5140 EVT VT = Vec.getValueType();
5141 EVT ElVT = VT.getVectorElementType();
5142 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5143 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5144 VT.getVectorNumElements()/Factor);
5145
5146 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5147 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5148 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5148, __PRETTY_FUNCTION__))
;
5149
5150 // This is the index of the first element of the vectorWidth-bit chunk
5151 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5152 IdxVal &= ~(ElemsPerChunk - 1);
5153
5154 // If the input is a buildvector just emit a smaller one.
5155 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5156 return DAG.getBuildVector(ResultVT, dl,
5157 Vec->ops().slice(IdxVal, ElemsPerChunk));
5158
5159 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5160 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5161}
5162
5163/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5164/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5165/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5166/// instructions or a simple subregister reference. Idx is an index in the
5167/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5168/// lowering EXTRACT_VECTOR_ELT operations easier.
5169static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5170 SelectionDAG &DAG, const SDLoc &dl) {
5171 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5172, __PRETTY_FUNCTION__))
5172 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5172, __PRETTY_FUNCTION__))
;
5173 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5174}
5175
5176/// Generate a DAG to grab 256-bits from a 512-bit vector.
5177static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5178 SelectionDAG &DAG, const SDLoc &dl) {
5179 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5179, __PRETTY_FUNCTION__))
;
5180 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5181}
5182
5183static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5184 SelectionDAG &DAG, const SDLoc &dl,
5185 unsigned vectorWidth) {
5186 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5187, __PRETTY_FUNCTION__))
5187 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5187, __PRETTY_FUNCTION__))
;
5188 // Inserting UNDEF is Result
5189 if (Vec.isUndef())
5190 return Result;
5191 EVT VT = Vec.getValueType();
5192 EVT ElVT = VT.getVectorElementType();
5193 EVT ResultVT = Result.getValueType();
5194
5195 // Insert the relevant vectorWidth bits.
5196 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5197 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5197, __PRETTY_FUNCTION__))
;
5198
5199 // This is the index of the first element of the vectorWidth-bit chunk
5200 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5201 IdxVal &= ~(ElemsPerChunk - 1);
5202
5203 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5204 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5205}
5206
5207/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5208/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5209/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5210/// simple superregister reference. Idx is an index in the 128 bits
5211/// we want. It need not be aligned to a 128-bit boundary. That makes
5212/// lowering INSERT_VECTOR_ELT operations easier.
5213static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5214 SelectionDAG &DAG, const SDLoc &dl) {
5215 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5215, __PRETTY_FUNCTION__))
;
5216 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5217}
5218
5219/// Widen a vector to a larger size with the same scalar type, with the new
5220/// elements either zero or undef.
5221static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5222 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5223 const SDLoc &dl) {
5224 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5226, __PRETTY_FUNCTION__))
5225 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5226, __PRETTY_FUNCTION__))
5226 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5226, __PRETTY_FUNCTION__))
;
5227 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5228 : DAG.getUNDEF(VT);
5229 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5230 DAG.getIntPtrConstant(0, dl));
5231}
5232
5233// Helper for splitting operands of an operation to legal target size and
5234// apply a function on each part.
5235// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5236// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5237// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5238// The argument Builder is a function that will be applied on each split part:
5239// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5240template <typename F>
5241SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5242 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5243 F Builder, bool CheckBWI = true) {
5244 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5244, __PRETTY_FUNCTION__))
;
5245 unsigned NumSubs = 1;
5246 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5247 (!CheckBWI && Subtarget.useAVX512Regs())) {
5248 if (VT.getSizeInBits() > 512) {
5249 NumSubs = VT.getSizeInBits() / 512;
5250 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5250, __PRETTY_FUNCTION__))
;
5251 }
5252 } else if (Subtarget.hasAVX2()) {
5253 if (VT.getSizeInBits() > 256) {
5254 NumSubs = VT.getSizeInBits() / 256;
5255 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5255, __PRETTY_FUNCTION__))
;
5256 }
5257 } else {
5258 if (VT.getSizeInBits() > 128) {
5259 NumSubs = VT.getSizeInBits() / 128;
5260 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5260, __PRETTY_FUNCTION__))
;
5261 }
5262 }
5263
5264 if (NumSubs == 1)
5265 return Builder(DAG, DL, Ops);
5266
5267 SmallVector<SDValue, 4> Subs;
5268 for (unsigned i = 0; i != NumSubs; ++i) {
5269 SmallVector<SDValue, 2> SubOps;
5270 for (SDValue Op : Ops) {
5271 EVT OpVT = Op.getValueType();
5272 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5273 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5274 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5275 }
5276 Subs.push_back(Builder(DAG, DL, SubOps));
5277 }
5278 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5279}
5280
5281// Return true if the instruction zeroes the unused upper part of the
5282// destination and accepts mask.
5283static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5284 switch (Opcode) {
5285 default:
5286 return false;
5287 case X86ISD::CMPM:
5288 case X86ISD::CMPM_RND:
5289 case ISD::SETCC:
5290 return true;
5291 }
5292}
5293
5294/// Insert i1-subvector to i1-vector.
5295static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5296 const X86Subtarget &Subtarget) {
5297
5298 SDLoc dl(Op);
5299 SDValue Vec = Op.getOperand(0);
5300 SDValue SubVec = Op.getOperand(1);
5301 SDValue Idx = Op.getOperand(2);
5302
5303 if (!isa<ConstantSDNode>(Idx))
5304 return SDValue();
5305
5306 // Inserting undef is a nop. We can just return the original vector.
5307 if (SubVec.isUndef())
5308 return Vec;
5309
5310 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5311 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5312 return Op;
5313
5314 MVT OpVT = Op.getSimpleValueType();
5315 unsigned NumElems = OpVT.getVectorNumElements();
5316
5317 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5318
5319 // Extend to natively supported kshift.
5320 MVT WideOpVT = OpVT;
5321 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5322 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5323
5324 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5325 // if necessary.
5326 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5327 // May need to promote to a legal type.
5328 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5329 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5330 SubVec, Idx);
5331 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5332 }
5333
5334 MVT SubVecVT = SubVec.getSimpleValueType();
5335 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5336
5337 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5339, __PRETTY_FUNCTION__))
5338 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5339, __PRETTY_FUNCTION__))
5339 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5339, __PRETTY_FUNCTION__))
;
5340
5341 SDValue Undef = DAG.getUNDEF(WideOpVT);
5342
5343 if (IdxVal == 0) {
5344 // Zero lower bits of the Vec
5345 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5346 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5347 ZeroIdx);
5348 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5349 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5350 // Merge them together, SubVec should be zero extended.
5351 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5352 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5353 SubVec, ZeroIdx);
5354 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5355 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5356 }
5357
5358 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5359 Undef, SubVec, ZeroIdx);
5360
5361 if (Vec.isUndef()) {
5362 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5362, __PRETTY_FUNCTION__))
;
5363 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5364 DAG.getConstant(IdxVal, dl, MVT::i8));
5365 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5366 }
5367
5368 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5369 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5369, __PRETTY_FUNCTION__))
;
5370 NumElems = WideOpVT.getVectorNumElements();
5371 unsigned ShiftLeft = NumElems - SubVecNumElems;
5372 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5373 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5374 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5375 if (ShiftRight != 0)
5376 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5377 DAG.getConstant(ShiftRight, dl, MVT::i8));
5378 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5379 }
5380
5381 // Simple case when we put subvector in the upper part
5382 if (IdxVal + SubVecNumElems == NumElems) {
5383 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5384 DAG.getConstant(IdxVal, dl, MVT::i8));
5385 if (SubVecNumElems * 2 == NumElems) {
5386 // Special case, use legal zero extending insert_subvector. This allows
5387 // isel to opimitize when bits are known zero.
5388 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5389 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5390 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5391 Vec, ZeroIdx);
5392 } else {
5393 // Otherwise use explicit shifts to zero the bits.
5394 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5395 Undef, Vec, ZeroIdx);
5396 NumElems = WideOpVT.getVectorNumElements();
5397 SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
5398 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5399 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5400 }
5401 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5402 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5403 }
5404
5405 // Inserting into the middle is more complicated.
5406
5407 NumElems = WideOpVT.getVectorNumElements();
5408
5409 // Widen the vector if needed.
5410 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5411 // Move the current value of the bit to be replace to the lsbs.
5412 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5413 DAG.getConstant(IdxVal, dl, MVT::i8));
5414 // Xor with the new bit.
5415 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
5416 // Shift to MSB, filling bottom bits with 0.
5417 unsigned ShiftLeft = NumElems - SubVecNumElems;
5418 Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
5419 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5420 // Shift to the final position, filling upper bits with 0.
5421 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5422 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
5423 DAG.getConstant(ShiftRight, dl, MVT::i8));
5424 // Xor with original vector leaving the new value.
5425 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
5426 // Reduce to original width if needed.
5427 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5428}
5429
5430static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
5431 unsigned NumElems, SelectionDAG &DAG,
5432 const SDLoc &dl, unsigned VectorWidth) {
5433 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth);
5434 return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth);
5435}
5436
5437/// Returns a vector of specified type with all bits set.
5438/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5439/// Then bitcast to their original type, ensuring they get CSE'd.
5440static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5441 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5442, __PRETTY_FUNCTION__))
5442 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5442, __PRETTY_FUNCTION__))
;
5443
5444 APInt Ones = APInt::getAllOnesValue(32);
5445 unsigned NumElts = VT.getSizeInBits() / 32;
5446 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5447 return DAG.getBitcast(VT, Vec);
5448}
5449
5450static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
5451 SelectionDAG &DAG) {
5452 EVT InVT = In.getValueType();
5453 assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode")(((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("(X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5453, __PRETTY_FUNCTION__))
;
5454
5455 if (VT.is128BitVector() && InVT.is128BitVector())
5456 return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
5457 : DAG.getZeroExtendVectorInReg(In, DL, VT);
5458
5459 // For 256-bit vectors, we only need the lower (128-bit) input half.
5460 // For 512-bit vectors, we only need the lower input half or quarter.
5461 if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
5462 int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5463 In = extractSubVector(In, 0, DAG, DL,
5464 std::max(128, (int)VT.getSizeInBits() / Scale));
5465 }
5466
5467 return DAG.getNode(Opc, DL, VT, In);
5468}
5469
5470/// Returns a vector_shuffle node for an unpackl operation.
5471static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5472 SDValue V1, SDValue V2) {
5473 SmallVector<int, 8> Mask;
5474 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5475 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5476}
5477
5478/// Returns a vector_shuffle node for an unpackh operation.
5479static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5480 SDValue V1, SDValue V2) {
5481 SmallVector<int, 8> Mask;
5482 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5483 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5484}
5485
5486/// Return a vector_shuffle of the specified vector of zero or undef vector.
5487/// This produces a shuffle where the low element of V2 is swizzled into the
5488/// zero/undef vector, landing at element Idx.
5489/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5490static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5491 bool IsZero,
5492 const X86Subtarget &Subtarget,
5493 SelectionDAG &DAG) {
5494 MVT VT = V2.getSimpleValueType();
5495 SDValue V1 = IsZero
5496 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5497 int NumElems = VT.getVectorNumElements();
5498 SmallVector<int, 16> MaskVec(NumElems);
5499 for (int i = 0; i != NumElems; ++i)
5500 // If this is the insertion idx, put the low elt of V2 here.
5501 MaskVec[i] = (i == Idx) ? NumElems : i;
5502 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5503}
5504
5505// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
5506static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
5507 while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
5508 V = V.getOperand(0);
5509 return V;
5510}
5511
5512static const Constant *getTargetConstantFromNode(SDValue Op) {
5513 Op = peekThroughBitcasts(Op);
5514
5515 auto *Load = dyn_cast<LoadSDNode>(Op);
5516 if (!Load)
5517 return nullptr;
5518
5519 SDValue Ptr = Load->getBasePtr();
5520 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5521 Ptr->getOpcode() == X86ISD::WrapperRIP)
5522 Ptr = Ptr->getOperand(0);
5523
5524 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5525 if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
5526 return nullptr;
5527
5528 return CNode->getConstVal();
5529}
5530
5531// Extract raw constant bits from constant pools.
5532static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5533 APInt &UndefElts,
5534 SmallVectorImpl<APInt> &EltBits,
5535 bool AllowWholeUndefs = true,
5536 bool AllowPartialUndefs = true) {
5537 assert(EltBits.empty() && "Expected an empty EltBits vector")((EltBits.empty() && "Expected an empty EltBits vector"
) ? static_cast<void> (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5537, __PRETTY_FUNCTION__))
;
5538
5539 Op = peekThroughBitcasts(Op);
5540
5541 EVT VT = Op.getValueType();
5542 unsigned SizeInBits = VT.getSizeInBits();
5543 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"
) ? static_cast<void> (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5543, __PRETTY_FUNCTION__))
;
5544 unsigned NumElts = SizeInBits / EltSizeInBits;
5545
5546 // Bitcast a source array of element bits to the target size.
5547 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5548 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5549 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5550 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5551, __PRETTY_FUNCTION__))
5551 "Constant bit sizes don't match")(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5551, __PRETTY_FUNCTION__))
;
5552
5553 // Don't split if we don't allow undef bits.
5554 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5555 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5556 return false;
5557
5558 // If we're already the right size, don't bother bitcasting.
5559 if (NumSrcElts == NumElts) {
5560 UndefElts = UndefSrcElts;
5561 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5562 return true;
5563 }
5564
5565 // Extract all the undef/constant element data and pack into single bitsets.
5566 APInt UndefBits(SizeInBits, 0);
5567 APInt MaskBits(SizeInBits, 0);
5568
5569 for (unsigned i = 0; i != NumSrcElts; ++i) {
5570 unsigned BitOffset = i * SrcEltSizeInBits;
5571 if (UndefSrcElts[i])
5572 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5573 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5574 }
5575
5576 // Split the undef/constant single bitset data into the target elements.
5577 UndefElts = APInt(NumElts, 0);
5578 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5579
5580 for (unsigned i = 0; i != NumElts; ++i) {
5581 unsigned BitOffset = i * EltSizeInBits;
5582 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5583
5584 // Only treat an element as UNDEF if all bits are UNDEF.
5585 if (UndefEltBits.isAllOnesValue()) {
5586 if (!AllowWholeUndefs)
5587 return false;
5588 UndefElts.setBit(i);
5589 continue;
5590 }
5591
5592 // If only some bits are UNDEF then treat them as zero (or bail if not
5593 // supported).
5594 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5595 return false;
5596
5597 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5598 EltBits[i] = Bits.getZExtValue();
5599 }
5600 return true;
5601 };
5602
5603 // Collect constant bits and insert into mask/undef bit masks.
5604 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5605 unsigned UndefBitIndex) {
5606 if (!Cst)
5607 return false;
5608 if (isa<UndefValue>(Cst)) {
5609 Undefs.setBit(UndefBitIndex);
5610 return true;
5611 }
5612 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5613 Mask = CInt->getValue();
5614 return true;
5615 }
5616 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5617 Mask = CFP->getValueAPF().bitcastToAPInt();
5618 return true;
5619 }
5620 return false;
5621 };
5622
5623 // Handle UNDEFs.
5624 if (Op.isUndef()) {
5625 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5626 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5627 return CastBitData(UndefSrcElts, SrcEltBits);
5628 }
5629
5630 // Extract scalar constant bits.
5631 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5632 APInt UndefSrcElts = APInt::getNullValue(1);
5633 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5634 return CastBitData(UndefSrcElts, SrcEltBits);
5635 }
5636 if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
5637 APInt UndefSrcElts = APInt::getNullValue(1);
5638 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5639 SmallVector<APInt, 64> SrcEltBits(1, RawBits);
5640 return CastBitData(UndefSrcElts, SrcEltBits);
5641 }
5642
5643 // Extract constant bits from build vector.
5644 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5645 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5646 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5647
5648 APInt UndefSrcElts(NumSrcElts, 0);
5649 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5650 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5651 const SDValue &Src = Op.getOperand(i);
5652 if (Src.isUndef()) {
5653 UndefSrcElts.setBit(i);
5654 continue;
5655 }
5656 auto *Cst = cast<ConstantSDNode>(Src);
5657 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5658 }
5659 return CastBitData(UndefSrcElts, SrcEltBits);
5660 }
5661 if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
5662 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5663 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5664
5665 APInt UndefSrcElts(NumSrcElts, 0);
5666 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5667 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5668 const SDValue &Src = Op.getOperand(i);
5669 if (Src.isUndef()) {
5670 UndefSrcElts.setBit(i);
5671 continue;
5672 }
5673 auto *Cst = cast<ConstantFPSDNode>(Src);
5674 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5675 SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
5676 }
5677 return CastBitData(UndefSrcElts, SrcEltBits);
5678 }
5679
5680 // Extract constant bits from constant pool vector.
5681 if (auto *Cst = getTargetConstantFromNode(Op)) {
5682 Type *CstTy = Cst->getType();
5683 unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
5684 if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
5685 return false;
5686
5687 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
5688 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5689
5690 APInt UndefSrcElts(NumSrcElts, 0);
5691 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5692 for (unsigned i = 0; i != NumSrcElts; ++i)
5693 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
5694 UndefSrcElts, i))
5695 return false;
5696
5697 return CastBitData(UndefSrcElts, SrcEltBits);
5698 }
5699
5700 // Extract constant bits from a broadcasted constant pool scalar.
5701 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5702 EltSizeInBits <= VT.getScalarSizeInBits()) {
5703 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5704 unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
5705 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5706
5707 APInt UndefSrcElts(NumSrcElts, 0);
5708 SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
5709 if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
5710 if (UndefSrcElts[0])
5711 UndefSrcElts.setBits(0, NumSrcElts);
5712 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
5713 return CastBitData(UndefSrcElts, SrcEltBits);
5714 }
5715 }
5716 }
5717
5718 // Extract a rematerialized scalar constant insertion.
5719 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5720 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5721 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5722 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5723 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5724
5725 APInt UndefSrcElts(NumSrcElts, 0);
5726 SmallVector<APInt, 64> SrcEltBits;
5727 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5728 SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
5729 SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
5730 return CastBitData(UndefSrcElts, SrcEltBits);
5731 }
5732
5733 // Extract constant bits from a subvector's source.
5734 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5735 isa<ConstantSDNode>(Op.getOperand(1))) {
5736 // TODO - support extract_subvector through bitcasts.
5737 if (EltSizeInBits != VT.getScalarSizeInBits())
5738 return false;
5739
5740 if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
5741 UndefElts, EltBits, AllowWholeUndefs,
5742 AllowPartialUndefs)) {
5743 EVT SrcVT = Op.getOperand(0).getValueType();
5744 unsigned NumSrcElts = SrcVT.getVectorNumElements();
5745 unsigned NumSubElts = VT.getVectorNumElements();
5746 unsigned BaseIdx = Op.getConstantOperandVal(1);
5747 UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
5748 if ((BaseIdx + NumSubElts) != NumSrcElts)
5749 EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
5750 if (BaseIdx != 0)
5751 EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
5752 return true;
5753 }
5754 }
5755
5756 // Extract constant bits from shuffle node sources.
5757 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) {
5758 // TODO - support shuffle through bitcasts.
5759 if (EltSizeInBits != VT.getScalarSizeInBits())
5760 return false;
5761
5762 ArrayRef<int> Mask = SVN->getMask();
5763 if ((!AllowWholeUndefs || !AllowPartialUndefs) &&
5764 llvm::any_of(Mask, [](int M) { return M < 0; }))
5765 return false;
5766
5767 APInt UndefElts0, UndefElts1;
5768 SmallVector<APInt, 32> EltBits0, EltBits1;
5769 if (isAnyInRange(Mask, 0, NumElts) &&
5770 !getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
5771 UndefElts0, EltBits0, AllowWholeUndefs,
5772 AllowPartialUndefs))
5773 return false;
5774 if (isAnyInRange(Mask, NumElts, 2 * NumElts) &&
5775 !getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
5776 UndefElts1, EltBits1, AllowWholeUndefs,
5777 AllowPartialUndefs))
5778 return false;
5779
5780 UndefElts = APInt::getNullValue(NumElts);
5781 for (int i = 0; i != (int)NumElts; ++i) {
5782 int M = Mask[i];
5783 if (M < 0) {
5784 UndefElts.setBit(i);
5785 EltBits.push_back(APInt::getNullValue(EltSizeInBits));
5786 } else if (M < (int)NumElts) {
5787 if (UndefElts0[M])
5788 UndefElts.setBit(i);
5789 EltBits.push_back(EltBits0[M]);
5790 } else {
5791 if (UndefElts1[M - NumElts])
5792 UndefElts.setBit(i);
5793 EltBits.push_back(EltBits1[M - NumElts]);
5794 }
5795 }
5796 return true;
5797 }
5798
5799 return false;
5800}
5801
5802static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
5803 APInt UndefElts;
5804 SmallVector<APInt, 16> EltBits;
5805 if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
5806 UndefElts, EltBits, true, false)) {
5807 int SplatIndex = -1;
5808 for (int i = 0, e = EltBits.size(); i != e; ++i) {
5809 if (UndefElts[i])
5810 continue;
5811 if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) {
5812 SplatIndex = -1;
5813 break;
5814 }
5815 SplatIndex = i;
5816 }
5817 if (0 <= SplatIndex) {
5818 SplatVal = EltBits[SplatIndex];
5819 return true;
5820 }
5821 }
5822
5823 return false;
5824}
5825
5826static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5827 unsigned MaskEltSizeInBits,
5828 SmallVectorImpl<uint64_t> &RawMask,
5829 APInt &UndefElts) {
5830 // Extract the raw target constant bits.
5831 SmallVector<APInt, 64> EltBits;
5832 if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
5833 EltBits, /* AllowWholeUndefs */ true,
5834 /* AllowPartialUndefs */ false))
5835 return false;
5836
5837 // Insert the extracted elements into the mask.
5838 for (APInt Elt : EltBits)
5839 RawMask.push_back(Elt.getZExtValue());
5840
5841 return true;
5842}
5843
5844/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
5845/// Note: This ignores saturation, so inputs must be checked first.
5846static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
5847 bool Unary) {
5848 assert(Mask.empty() && "Expected an empty shuffle mask vector")((Mask.empty() && "Expected an empty shuffle mask vector"
) ? static_cast<void> (0) : __assert_fail ("Mask.empty() && \"Expected an empty shuffle mask vector\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5848, __PRETTY_FUNCTION__))
;
5849 unsigned NumElts = VT.getVectorNumElements();
5850 unsigned NumLanes = VT.getSizeInBits() / 128;
5851 unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();
5852 unsigned Offset = Unary ? 0 : NumElts;
5853
5854 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
5855 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5856 Mask.push_back(Elt + (Lane * NumEltsPerLane));
5857 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5858 Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
5859 }
5860}
5861
5862/// Calculates the shuffle mask corresponding to the target-specific opcode.
5863/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
5864/// operands in \p Ops, and returns true.
5865/// Sets \p IsUnary to true if only one source is used. Note that this will set
5866/// IsUnary for shuffles which use a single input multiple times, and in those
5867/// cases it will adjust the mask to only have indices within that single input.
5868/// It is an error to call this with non-empty Mask/Ops vectors.
5869static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
5870 SmallVectorImpl<SDValue> &Ops,
5871 SmallVectorImpl<int> &Mask, bool &IsUnary) {
5872 unsigned NumElems = VT.getVectorNumElements();
5873 unsigned MaskEltSize = VT.getScalarSizeInBits();
5874 SmallVector<uint64_t, 32> RawMask;
5875 APInt RawUndefs;
5876 SDValue ImmN;
5877
5878 assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector")((Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"
) ? static_cast<void> (0) : __assert_fail ("Mask.empty() && \"getTargetShuffleMask expects an empty Mask vector\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5878, __PRETTY_FUNCTION__))
;
5879 assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector")((Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"
) ? static_cast<void> (0) : __assert_fail ("Ops.empty() && \"getTargetShuffleMask expects an empty Ops vector\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5879, __PRETTY_FUNCTION__))
;
5880
5881 IsUnary = false;
5882 bool IsFakeUnary = false;
5883 switch (N->getOpcode()) {
5884 case X86ISD::BLENDI:
5885 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5885, __PRETTY_FUNCTION__))
;
5886 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5886, __PRETTY_FUNCTION__))
;
5887 ImmN = N->getOperand(N->getNumOperands() - 1);
5888 DecodeBLENDMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5889 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5890 break;
5891 case X86ISD::SHUFP:
5892 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5892, __PRETTY_FUNCTION__))
;
5893 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5893, __PRETTY_FUNCTION__))
;
5894 ImmN = N->getOperand(N->getNumOperands() - 1);
5895 DecodeSHUFPMask(NumElems, MaskEltSize,
5896 cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5897 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5898 break;
5899 case X86ISD::INSERTPS:
5900 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5900, __PRETTY_FUNCTION__))
;
5901 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/X86/X86ISelLowering.cpp"
, 5901, __PRETTY_FUNCTION__))
;
5902 ImmN = N->getOperand(N->getNumOperands() - 1);
5903 DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5904 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5905 break;