Bug Summary

File:lib/Target/X86/X86ISelLowering.cpp
Warning:line 125, column 3
Potential memory leak

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn325874/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86 -I /build/llvm-toolchain-snapshot-7~svn325874/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn325874/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn325874/build-llvm/lib/Target/X86 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-02-23-163436-368-1 -x c++ /build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86ShuffleDecodeConstantPool.h"
23#include "X86TargetMachine.h"
24#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/WinEHFuncInfo.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/DiagnosticInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalAlias.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/MC/MCAsmInfo.h"
51#include "llvm/MC/MCContext.h"
52#include "llvm/MC/MCExpr.h"
53#include "llvm/MC/MCSymbol.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/MathExtras.h"
59#include "llvm/Target/TargetOptions.h"
60#include <algorithm>
61#include <bitset>
62#include <cctype>
63#include <numeric>
64using namespace llvm;
65
66#define DEBUG_TYPE"x86-isel" "x86-isel"
67
68STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
69
70static cl::opt<bool> ExperimentalVectorWideningLegalization(
71 "x86-experimental-vector-widening-legalization", cl::init(false),
72 cl::desc("Enable an experimental vector type legalization through widening "
73 "rather than promotion."),
74 cl::Hidden);
75
76static cl::opt<int> ExperimentalPrefLoopAlignment(
77 "x86-experimental-pref-loop-alignment", cl::init(4),
78 cl::desc("Sets the preferable loop alignment for experiments "
79 "(the last x86-experimental-pref-loop-alignment bits"
80 " of the loop header PC will be 0)."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89/// Call this when the user attempts to do something unsupported, like
90/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
91/// report_fatal_error, so calling code should attempt to recover without
92/// crashing.
93static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
94 const char *Msg) {
95 MachineFunction &MF = DAG.getMachineFunction();
96 DAG.getContext()->diagnose(
97 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
98}
99
100X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
101 const X86Subtarget &STI)
102 : TargetLowering(TM), Subtarget(STI) {
103 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
104 X86ScalarSSEf64 = Subtarget.hasSSE2();
105 X86ScalarSSEf32 = Subtarget.hasSSE1();
106 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
107
108 // Set up the TargetLowering object.
109
110 // X86 is weird. It always uses i8 for shift amounts and setcc results.
111 setBooleanContents(ZeroOrOneBooleanContent);
112 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
113 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
114
115 // For 64-bit, since we have so many registers, use the ILP scheduler.
116 // For 32-bit, use the register pressure specific scheduling.
117 // For Atom, always use ILP scheduling.
118 if (Subtarget.isAtom())
119 setSchedulingPreference(Sched::ILP);
120 else if (Subtarget.is64Bit())
121 setSchedulingPreference(Sched::ILP);
122 else
123 setSchedulingPreference(Sched::RegPressure);
124 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
125 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
126
127 // Bypass expensive divides and use cheaper ones.
128 if (TM.getOptLevel() >= CodeGenOpt::Default) {
129 if (Subtarget.hasSlowDivide32())
130 addBypassSlowDiv(32, 8);
131 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
132 addBypassSlowDiv(64, 32);
133 }
134
135 if (Subtarget.isTargetKnownWindowsMSVC() ||
136 Subtarget.isTargetWindowsItanium()) {
137 // Setup Windows compiler runtime calls.
138 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
139 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
140 setLibcallName(RTLIB::SREM_I64, "_allrem");
141 setLibcallName(RTLIB::UREM_I64, "_aullrem");
142 setLibcallName(RTLIB::MUL_I64, "_allmul");
143 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
146 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
147 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
148 }
149
150 if (Subtarget.isTargetDarwin()) {
151 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
152 setUseUnderscoreSetJmp(false);
153 setUseUnderscoreLongJmp(false);
154 } else if (Subtarget.isTargetWindowsGNU()) {
155 // MS runtime is weird: it exports _setjmp, but longjmp!
156 setUseUnderscoreSetJmp(true);
157 setUseUnderscoreLongJmp(false);
158 } else {
159 setUseUnderscoreSetJmp(true);
160 setUseUnderscoreLongJmp(true);
161 }
162
163 // Set up the register classes.
164 addRegisterClass(MVT::i8, &X86::GR8RegClass);
165 addRegisterClass(MVT::i16, &X86::GR16RegClass);
166 addRegisterClass(MVT::i32, &X86::GR32RegClass);
167 if (Subtarget.is64Bit())
168 addRegisterClass(MVT::i64, &X86::GR64RegClass);
169
170 for (MVT VT : MVT::integer_valuetypes())
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
172
173 // We don't accept any truncstore of integer registers.
174 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
175 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
176 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
177 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
178 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
179 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
180
181 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
182
183 // SETOEQ and SETUNE require checking two conditions.
184 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
185 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
186 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
187 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
188 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
189 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
190
191 // Integer absolute.
192 if (Subtarget.hasCMov()) {
193 setOperationAction(ISD::ABS , MVT::i16 , Custom);
194 setOperationAction(ISD::ABS , MVT::i32 , Custom);
195 if (Subtarget.is64Bit())
196 setOperationAction(ISD::ABS , MVT::i64 , Custom);
197 }
198
199 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
200 // operation.
201 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
202 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
203 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
204
205 if (Subtarget.is64Bit()) {
206 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
207 // f32/f64 are legal, f80 is custom.
208 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
209 else
210 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
211 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
212 } else if (!Subtarget.useSoftFloat()) {
213 // We have an algorithm for SSE2->double, and we turn this into a
214 // 64-bit FILD followed by conditional FADD for other targets.
215 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
216 // We have an algorithm for SSE2, and we turn this into a 64-bit
217 // FILD or VCVTUSI2SS/SD for other targets.
218 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
219 }
220
221 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
222 // this operation.
223 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
224 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
225
226 if (!Subtarget.useSoftFloat()) {
227 // SSE has no i16 to fp conversion, only i32.
228 if (X86ScalarSSEf32) {
229 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
230 // f32 and f64 cases are Legal, f80 case is not
231 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
232 } else {
233 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
234 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
235 }
236 } else {
237 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
238 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
239 }
240
241 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
242 // this operation.
243 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
244 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
245
246 if (!Subtarget.useSoftFloat()) {
247 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
248 // are Legal, f80 is custom lowered.
249 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
250 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
251
252 if (X86ScalarSSEf32) {
253 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
254 // f32 and f64 cases are Legal, f80 case is not
255 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
256 } else {
257 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
258 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
259 }
260 } else {
261 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
262 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
263 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
264 }
265
266 // Handle FP_TO_UINT by promoting the destination to a larger signed
267 // conversion.
268 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
269 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
270 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
271
272 if (Subtarget.is64Bit()) {
273 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
274 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
275 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
276 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
277 } else {
278 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
279 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
280 }
281 } else if (!Subtarget.useSoftFloat()) {
282 // Since AVX is a superset of SSE3, only check for SSE here.
283 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
284 // Expand FP_TO_UINT into a select.
285 // FIXME: We would like to use a Custom expander here eventually to do
286 // the optimal thing for SSE vs. the default expansion in the legalizer.
287 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
288 else
289 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
290 // With SSE3 we can use fisttpll to convert to a signed i64; without
291 // SSE, we're stuck with a fistpll.
292 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
293
294 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
295 }
296
297 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
298 if (!X86ScalarSSEf64) {
299 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
300 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
301 if (Subtarget.is64Bit()) {
302 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
303 // Without SSE, i64->f64 goes through memory.
304 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
305 }
306 } else if (!Subtarget.is64Bit())
307 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
308
309 // Scalar integer divide and remainder are lowered to use operations that
310 // produce two results, to match the available instructions. This exposes
311 // the two-result form to trivial CSE, which is able to combine x/y and x%y
312 // into a single instruction.
313 //
314 // Scalar integer multiply-high is also lowered to use two-result
315 // operations, to match the available instructions. However, plain multiply
316 // (low) operations are left as Legal, as there are single-result
317 // instructions for this in x86. Using the two-result multiply instructions
318 // when both high and low results are needed must be arranged by dagcombine.
319 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
320 setOperationAction(ISD::MULHS, VT, Expand);
321 setOperationAction(ISD::MULHU, VT, Expand);
322 setOperationAction(ISD::SDIV, VT, Expand);
323 setOperationAction(ISD::UDIV, VT, Expand);
324 setOperationAction(ISD::SREM, VT, Expand);
325 setOperationAction(ISD::UREM, VT, Expand);
326 }
327
328 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
329 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
330 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
331 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
332 setOperationAction(ISD::BR_CC, VT, Expand);
333 setOperationAction(ISD::SELECT_CC, VT, Expand);
334 }
335 if (Subtarget.is64Bit())
336 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
337 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
338 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
340 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
341
342 setOperationAction(ISD::FREM , MVT::f32 , Expand);
343 setOperationAction(ISD::FREM , MVT::f64 , Expand);
344 setOperationAction(ISD::FREM , MVT::f80 , Expand);
345 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
346
347 // Promote the i8 variants and force them on up to i32 which has a shorter
348 // encoding.
349 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
350 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
351 if (!Subtarget.hasBMI()) {
352 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
353 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
354 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
355 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
356 if (Subtarget.is64Bit()) {
357 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
358 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
359 }
360 }
361
362 if (Subtarget.hasLZCNT()) {
363 // When promoting the i8 variants, force them to i32 for a shorter
364 // encoding.
365 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
366 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
367 } else {
368 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
369 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
370 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
371 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
372 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
373 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
374 if (Subtarget.is64Bit()) {
375 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
376 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
377 }
378 }
379
380 // Special handling for half-precision floating point conversions.
381 // If we don't have F16C support, then lower half float conversions
382 // into library calls.
383 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
384 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
385 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
386 }
387
388 // There's never any support for operations beyond MVT::f32.
389 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
390 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
391 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
392 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
393
394 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
395 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
396 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
397 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
398 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
399 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
400
401 if (Subtarget.hasPOPCNT()) {
402 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
403 } else {
404 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
405 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
406 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
407 if (Subtarget.is64Bit())
408 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
409 }
410
411 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
412
413 if (!Subtarget.hasMOVBE())
414 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
415
416 // These should be promoted to a larger select which is supported.
417 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
418 // X86 wants to expand cmov itself.
419 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
420 setOperationAction(ISD::SELECT, VT, Custom);
421 setOperationAction(ISD::SETCC, VT, Custom);
422 }
423 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
424 if (VT == MVT::i64 && !Subtarget.is64Bit())
425 continue;
426 setOperationAction(ISD::SELECT, VT, Custom);
427 setOperationAction(ISD::SETCC, VT, Custom);
428 }
429
430 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
431 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
432 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
433
434 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
435 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
436 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
437 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
438 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
439 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
440 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
441 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
442
443 // Darwin ABI issue.
444 for (auto VT : { MVT::i32, MVT::i64 }) {
445 if (VT == MVT::i64 && !Subtarget.is64Bit())
446 continue;
447 setOperationAction(ISD::ConstantPool , VT, Custom);
448 setOperationAction(ISD::JumpTable , VT, Custom);
449 setOperationAction(ISD::GlobalAddress , VT, Custom);
450 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
451 setOperationAction(ISD::ExternalSymbol , VT, Custom);
452 setOperationAction(ISD::BlockAddress , VT, Custom);
453 }
454
455 // 64-bit shl, sra, srl (iff 32-bit x86)
456 for (auto VT : { MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SHL_PARTS, VT, Custom);
460 setOperationAction(ISD::SRA_PARTS, VT, Custom);
461 setOperationAction(ISD::SRL_PARTS, VT, Custom);
462 }
463
464 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
465 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
466
467 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
468
469 // Expand certain atomics
470 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
471 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
472 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
473 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
477 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
478 }
479
480 if (Subtarget.hasCmpxchg16b()) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
482 }
483
484 // FIXME - use subtarget debug flags
485 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
486 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
487 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
488 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
489 }
490
491 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
492 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
493
494 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
495 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
496
497 setOperationAction(ISD::TRAP, MVT::Other, Legal);
498 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
499
500 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
501 setOperationAction(ISD::VASTART , MVT::Other, Custom);
502 setOperationAction(ISD::VAEND , MVT::Other, Expand);
503 bool Is64Bit = Subtarget.is64Bit();
504 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
505 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
506
507 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
508 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
509
510 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
511
512 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
513 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
514 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
515
516 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
517 // f32 and f64 use SSE.
518 // Set up the FP register classes.
519 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
520 : &X86::FR32RegClass);
521 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
522 : &X86::FR64RegClass);
523
524 for (auto VT : { MVT::f32, MVT::f64 }) {
525 // Use ANDPD to simulate FABS.
526 setOperationAction(ISD::FABS, VT, Custom);
527
528 // Use XORP to simulate FNEG.
529 setOperationAction(ISD::FNEG, VT, Custom);
530
531 // Use ANDPD and ORPD to simulate FCOPYSIGN.
532 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
533
534 // We don't support sin/cos/fmod
535 setOperationAction(ISD::FSIN , VT, Expand);
536 setOperationAction(ISD::FCOS , VT, Expand);
537 setOperationAction(ISD::FSINCOS, VT, Expand);
538 }
539
540 // Lower this to MOVMSK plus an AND.
541 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
542 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
543
544 // Expand FP immediates into loads from the stack, except for the special
545 // cases we handle.
546 addLegalFPImmediate(APFloat(+0.0)); // xorpd
547 addLegalFPImmediate(APFloat(+0.0f)); // xorps
548 } else if (UseX87 && X86ScalarSSEf32) {
549 // Use SSE for f32, x87 for f64.
550 // Set up the FP register classes.
551 addRegisterClass(MVT::f32, &X86::FR32RegClass);
552 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
553
554 // Use ANDPS to simulate FABS.
555 setOperationAction(ISD::FABS , MVT::f32, Custom);
556
557 // Use XORP to simulate FNEG.
558 setOperationAction(ISD::FNEG , MVT::f32, Custom);
559
560 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
561
562 // Use ANDPS and ORPS to simulate FCOPYSIGN.
563 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
564 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
565
566 // We don't support sin/cos/fmod
567 setOperationAction(ISD::FSIN , MVT::f32, Expand);
568 setOperationAction(ISD::FCOS , MVT::f32, Expand);
569 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
570
571 // Special cases we handle for FP constants.
572 addLegalFPImmediate(APFloat(+0.0f)); // xorps
573 addLegalFPImmediate(APFloat(+0.0)); // FLD0
574 addLegalFPImmediate(APFloat(+1.0)); // FLD1
575 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
576 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
577
578 // Always expand sin/cos functions even though x87 has an instruction.
579 setOperationAction(ISD::FSIN , MVT::f64, Expand);
580 setOperationAction(ISD::FCOS , MVT::f64, Expand);
581 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
582 } else if (UseX87) {
583 // f32 and f64 in x87.
584 // Set up the FP register classes.
585 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
586 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
587
588 for (auto VT : { MVT::f32, MVT::f64 }) {
589 setOperationAction(ISD::UNDEF, VT, Expand);
590 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
591
592 // Always expand sin/cos functions even though x87 has an instruction.
593 setOperationAction(ISD::FSIN , VT, Expand);
594 setOperationAction(ISD::FCOS , VT, Expand);
595 setOperationAction(ISD::FSINCOS, VT, Expand);
596 }
597 addLegalFPImmediate(APFloat(+0.0)); // FLD0
598 addLegalFPImmediate(APFloat(+1.0)); // FLD1
599 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
600 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
601 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
602 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
603 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
604 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
605 }
606
607 // We don't support FMA.
608 setOperationAction(ISD::FMA, MVT::f64, Expand);
609 setOperationAction(ISD::FMA, MVT::f32, Expand);
610
611 // Long double always uses X87, except f128 in MMX.
612 if (UseX87) {
613 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
614 addRegisterClass(MVT::f128, &X86::FR128RegClass);
615 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
616 setOperationAction(ISD::FABS , MVT::f128, Custom);
617 setOperationAction(ISD::FNEG , MVT::f128, Custom);
618 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
619 }
620
621 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
622 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
623 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
624 {
625 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
626 addLegalFPImmediate(TmpFlt); // FLD0
627 TmpFlt.changeSign();
628 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
629
630 bool ignored;
631 APFloat TmpFlt2(+1.0);
632 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
633 &ignored);
634 addLegalFPImmediate(TmpFlt2); // FLD1
635 TmpFlt2.changeSign();
636 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
637 }
638
639 // Always expand sin/cos functions even though x87 has an instruction.
640 setOperationAction(ISD::FSIN , MVT::f80, Expand);
641 setOperationAction(ISD::FCOS , MVT::f80, Expand);
642 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
643
644 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
645 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
646 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
647 setOperationAction(ISD::FRINT, MVT::f80, Expand);
648 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
649 setOperationAction(ISD::FMA, MVT::f80, Expand);
650 }
651
652 // Always use a library call for pow.
653 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
654 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
655 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
656
657 setOperationAction(ISD::FLOG, MVT::f80, Expand);
658 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
659 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
660 setOperationAction(ISD::FEXP, MVT::f80, Expand);
661 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
662 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
663 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
664
665 // Some FP actions are always expanded for vector types.
666 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
667 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
668 setOperationAction(ISD::FSIN, VT, Expand);
669 setOperationAction(ISD::FSINCOS, VT, Expand);
670 setOperationAction(ISD::FCOS, VT, Expand);
671 setOperationAction(ISD::FREM, VT, Expand);
672 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
673 setOperationAction(ISD::FPOW, VT, Expand);
674 setOperationAction(ISD::FLOG, VT, Expand);
675 setOperationAction(ISD::FLOG2, VT, Expand);
676 setOperationAction(ISD::FLOG10, VT, Expand);
677 setOperationAction(ISD::FEXP, VT, Expand);
678 setOperationAction(ISD::FEXP2, VT, Expand);
679 }
680
681 // First set operation action for all vector types to either promote
682 // (for widening) or expand (for scalarization). Then we will selectively
683 // turn on ones that can be effectively codegen'd.
684 for (MVT VT : MVT::vector_valuetypes()) {
685 setOperationAction(ISD::SDIV, VT, Expand);
686 setOperationAction(ISD::UDIV, VT, Expand);
687 setOperationAction(ISD::SREM, VT, Expand);
688 setOperationAction(ISD::UREM, VT, Expand);
689 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
690 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
691 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
692 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
693 setOperationAction(ISD::FMA, VT, Expand);
694 setOperationAction(ISD::FFLOOR, VT, Expand);
695 setOperationAction(ISD::FCEIL, VT, Expand);
696 setOperationAction(ISD::FTRUNC, VT, Expand);
697 setOperationAction(ISD::FRINT, VT, Expand);
698 setOperationAction(ISD::FNEARBYINT, VT, Expand);
699 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
700 setOperationAction(ISD::MULHS, VT, Expand);
701 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
702 setOperationAction(ISD::MULHU, VT, Expand);
703 setOperationAction(ISD::SDIVREM, VT, Expand);
704 setOperationAction(ISD::UDIVREM, VT, Expand);
705 setOperationAction(ISD::CTPOP, VT, Expand);
706 setOperationAction(ISD::CTTZ, VT, Expand);
707 setOperationAction(ISD::CTLZ, VT, Expand);
708 setOperationAction(ISD::ROTL, VT, Expand);
709 setOperationAction(ISD::ROTR, VT, Expand);
710 setOperationAction(ISD::BSWAP, VT, Expand);
711 setOperationAction(ISD::SETCC, VT, Expand);
712 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
713 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
714 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
715 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
716 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
717 setOperationAction(ISD::TRUNCATE, VT, Expand);
718 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
719 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
720 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
721 setOperationAction(ISD::SELECT_CC, VT, Expand);
722 for (MVT InnerVT : MVT::vector_valuetypes()) {
723 setTruncStoreAction(InnerVT, VT, Expand);
724
725 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
726 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
727
728 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
729 // types, we have to deal with them whether we ask for Expansion or not.
730 // Setting Expand causes its own optimisation problems though, so leave
731 // them legal.
732 if (VT.getVectorElementType() == MVT::i1)
733 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
734
735 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
736 // split/scalarized right now.
737 if (VT.getVectorElementType() == MVT::f16)
738 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
739 }
740 }
741
742 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
743 // with -msoft-float, disable use of MMX as well.
744 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
745 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
746 // No operations on x86mmx supported, everything uses intrinsics.
747 }
748
749 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
750 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
751 : &X86::VR128RegClass);
752
753 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
754 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
755 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
756 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
757 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
758 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
759 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
760 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
761 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
762 }
763
764 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
765 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
766 : &X86::VR128RegClass);
767
768 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
769 // registers cannot be used even for integer operations.
770 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
771 : &X86::VR128RegClass);
772 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
773 : &X86::VR128RegClass);
774 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
775 : &X86::VR128RegClass);
776 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
777 : &X86::VR128RegClass);
778
779 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
780 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
781 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
782 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
783 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
784 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
785 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
786 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
787 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
788 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
789 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
790 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
791 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
792
793 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
794 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
795 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
796 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
797 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
798 }
799
800 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
801 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
802 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
803
804 // Provide custom widening for v2f32 setcc. This is really for VLX when
805 // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
806 // type legalization changing the result type to v4i1 during widening.
807 // It works fine for SSE2 and is probably faster so no need to qualify with
808 // VLX support.
809 setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
810
811 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
812 setOperationAction(ISD::SETCC, VT, Custom);
813 setOperationAction(ISD::CTPOP, VT, Custom);
814 setOperationAction(ISD::CTTZ, VT, Custom);
815 }
816
817 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
818 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
819 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
820 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
821 setOperationAction(ISD::VSELECT, VT, Custom);
822 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
823 }
824
825 // We support custom legalizing of sext and anyext loads for specific
826 // memory vector types which we can load as a scalar (or sequence of
827 // scalars) and extend in-register to a legal 128-bit vector type. For sext
828 // loads these must work with a single scalar load.
829 for (MVT VT : MVT::integer_vector_valuetypes()) {
830 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
831 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
832 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
833 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
834 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
835 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
836 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
837 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
838 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
839 }
840
841 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
842 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
843 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
844 setOperationAction(ISD::VSELECT, VT, Custom);
845
846 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
847 continue;
848
849 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
850 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
851 }
852
853 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
854 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
855 setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
856 setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
857 setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
858 setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
859 setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
860 }
861
862 // Custom lower v2i64 and v2f64 selects.
863 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
864 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
865
866 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
867 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
868
869 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
870 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
871
872 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
873
874 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
875 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
876
877 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
878 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
879
880 for (MVT VT : MVT::fp_vector_valuetypes())
881 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
882
883 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
884 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
885 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
886
887 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
888 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
889 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
890
891 // In the customized shift lowering, the legal v4i32/v2i64 cases
892 // in AVX2 will be recognized.
893 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
894 setOperationAction(ISD::SRL, VT, Custom);
895 setOperationAction(ISD::SHL, VT, Custom);
896 setOperationAction(ISD::SRA, VT, Custom);
897 }
898 }
899
900 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
901 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
902 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
903 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
904 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
905 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
906 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
907 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
908 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
909 }
910
911 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
912 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
913 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
914 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
915 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
916 setOperationAction(ISD::FRINT, RoundedTy, Legal);
917 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
918 }
919
920 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
921 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
922 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
923 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
924 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
925 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
926 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
927 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
928
929 // FIXME: Do we need to handle scalar-to-vector here?
930 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
931
932 // We directly match byte blends in the backend as they match the VSELECT
933 // condition form.
934 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
935
936 // SSE41 brings specific instructions for doing vector sign extend even in
937 // cases where we don't have SRA.
938 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
939 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
940 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
941 }
942
943 for (MVT VT : MVT::integer_vector_valuetypes()) {
944 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
945 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
946 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
947 }
948
949 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
950 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
951 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
952 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
953 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
954 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
955 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
956 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
957 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
958 }
959
960 // i8 vectors are custom because the source register and source
961 // source memory operand types are not the same width.
962 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
963 }
964
965 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
966 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
967 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
968 setOperationAction(ISD::ROTL, VT, Custom);
969
970 // XOP can efficiently perform BITREVERSE with VPPERM.
971 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
972 setOperationAction(ISD::BITREVERSE, VT, Custom);
973
974 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
975 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
976 setOperationAction(ISD::BITREVERSE, VT, Custom);
977 }
978
979 if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
980 bool HasInt256 = Subtarget.hasInt256();
981
982 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
983 : &X86::VR256RegClass);
984 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
985 : &X86::VR256RegClass);
986 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
987 : &X86::VR256RegClass);
988 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
989 : &X86::VR256RegClass);
990 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
991 : &X86::VR256RegClass);
992 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
993 : &X86::VR256RegClass);
994
995 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
996 setOperationAction(ISD::FFLOOR, VT, Legal);
997 setOperationAction(ISD::FCEIL, VT, Legal);
998 setOperationAction(ISD::FTRUNC, VT, Legal);
999 setOperationAction(ISD::FRINT, VT, Legal);
1000 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1001 setOperationAction(ISD::FNEG, VT, Custom);
1002 setOperationAction(ISD::FABS, VT, Custom);
1003 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1004 }
1005
1006 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1007 // even though v8i16 is a legal type.
1008 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1009 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1010 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1011
1012 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1013 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1014
1015 for (MVT VT : MVT::fp_vector_valuetypes())
1016 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1017
1018 // In the customized shift lowering, the legal v8i32/v4i64 cases
1019 // in AVX2 will be recognized.
1020 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1021 setOperationAction(ISD::SRL, VT, Custom);
1022 setOperationAction(ISD::SHL, VT, Custom);
1023 setOperationAction(ISD::SRA, VT, Custom);
1024 }
1025
1026 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1027 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1028 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1029
1030 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1031 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1032 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1033 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1034 }
1035
1036 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1037 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1038 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1039 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1040
1041 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1042 setOperationAction(ISD::SETCC, VT, Custom);
1043 setOperationAction(ISD::CTPOP, VT, Custom);
1044 setOperationAction(ISD::CTTZ, VT, Custom);
1045 setOperationAction(ISD::CTLZ, VT, Custom);
1046 }
1047
1048 if (Subtarget.hasAnyFMA()) {
1049 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1050 MVT::v2f64, MVT::v4f64 })
1051 setOperationAction(ISD::FMA, VT, Legal);
1052 }
1053
1054 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1055 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1056 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1057 }
1058
1059 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1060 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1061 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1062 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1063
1064 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1065 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1066
1067 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1068 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1069 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1070 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1071
1072 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1073 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1074 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1075 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1076
1077 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1078 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1079 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1080 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1081 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1082 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1083 }
1084
1085 if (HasInt256) {
1086 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
1087 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
1088 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
1089
1090 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1091 // when we have a 256bit-wide blend with immediate.
1092 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1093
1094 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1095 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1096 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1097 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1098 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1099 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1100 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1101 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1102 }
1103 }
1104
1105 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1106 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1107 setOperationAction(ISD::MLOAD, VT, Legal);
1108 setOperationAction(ISD::MSTORE, VT, Legal);
1109 }
1110
1111 // Extract subvector is special because the value type
1112 // (result) is 128-bit but the source is 256-bit wide.
1113 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1114 MVT::v4f32, MVT::v2f64 }) {
1115 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1116 }
1117
1118 // Custom lower several nodes for 256-bit types.
1119 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1120 MVT::v8f32, MVT::v4f64 }) {
1121 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1122 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1123 setOperationAction(ISD::VSELECT, VT, Custom);
1124 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1125 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1126 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1127 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1128 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1129 }
1130
1131 if (HasInt256)
1132 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1133
1134 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1135 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1136 setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
1137 setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
1138 setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
1139 setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
1140 setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
1141 }
1142
1143 if (HasInt256) {
1144 // Custom legalize 2x32 to get a little better code.
1145 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1146 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1147
1148 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1149 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1150 setOperationAction(ISD::MGATHER, VT, Custom);
1151 }
1152 }
1153
1154 // This block controls legalization of the mask vector sizes that are
1155 // available with AVX512. 512-bit vectors are in a separate block controlled
1156 // by useAVX512Regs.
1157 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1158 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1159 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1160 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1161 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1162 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1163
1164 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1165 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1166 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1167
1168 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1169 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1170 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1171 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1172 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1173 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1174
1175 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1176 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1177 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1178 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1179 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1180 }
1181
1182 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1183 setOperationAction(ISD::ADD, VT, Custom);
1184 setOperationAction(ISD::SUB, VT, Custom);
1185 setOperationAction(ISD::MUL, VT, Custom);
1186 setOperationAction(ISD::SETCC, VT, Custom);
1187 setOperationAction(ISD::SELECT, VT, Custom);
1188 setOperationAction(ISD::TRUNCATE, VT, Custom);
1189
1190 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1191 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1192 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1193 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1194 setOperationAction(ISD::VSELECT, VT, Expand);
1195 }
1196
1197 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1198 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1199 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1200 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
1201 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1202 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1203 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1204 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1205 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1206 }
1207
1208 // This block controls legalization for 512-bit operations with 32/64 bit
1209 // elements. 512-bits can be disabled based on prefer-vector-width and
1210 // required-vector-width function attributes.
1211 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1212 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1213 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1214 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1215 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1216
1217 for (MVT VT : MVT::fp_vector_valuetypes())
1218 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1219
1220 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1221 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1222 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1223 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1224 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1225 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1226 }
1227
1228 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1229 setOperationAction(ISD::FNEG, VT, Custom);
1230 setOperationAction(ISD::FABS, VT, Custom);
1231 setOperationAction(ISD::FMA, VT, Legal);
1232 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1233 }
1234
1235 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1236 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1237 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1238 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1239 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1240 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1241 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1242 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1243 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1244 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1245
1246 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1247 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1248 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1249 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1250 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1251
1252 if (!Subtarget.hasVLX()) {
1253 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1254 // to 512-bit rather than use the AVX2 instructions so that we can use
1255 // k-masks.
1256 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1257 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1258 setOperationAction(ISD::MLOAD, VT, Custom);
1259 setOperationAction(ISD::MSTORE, VT, Custom);
1260 }
1261 }
1262
1263 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1264 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1265 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1266 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1267 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1268 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1269 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1270 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1271
1272 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1273 setOperationAction(ISD::FFLOOR, VT, Legal);
1274 setOperationAction(ISD::FCEIL, VT, Legal);
1275 setOperationAction(ISD::FTRUNC, VT, Legal);
1276 setOperationAction(ISD::FRINT, VT, Legal);
1277 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1278 }
1279
1280 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
1281 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
1282
1283 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1284 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1285 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1286
1287 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1288 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1289 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1290 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1291
1292 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1293 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1294
1295 setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
1296 setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
1297
1298 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1299 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1300 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1301
1302 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1303 setOperationAction(ISD::SMAX, VT, Legal);
1304 setOperationAction(ISD::UMAX, VT, Legal);
1305 setOperationAction(ISD::SMIN, VT, Legal);
1306 setOperationAction(ISD::UMIN, VT, Legal);
1307 setOperationAction(ISD::ABS, VT, Legal);
1308 setOperationAction(ISD::SRL, VT, Custom);
1309 setOperationAction(ISD::SHL, VT, Custom);
1310 setOperationAction(ISD::SRA, VT, Custom);
1311 setOperationAction(ISD::CTPOP, VT, Custom);
1312 setOperationAction(ISD::CTTZ, VT, Custom);
1313 setOperationAction(ISD::ROTL, VT, Custom);
1314 setOperationAction(ISD::ROTR, VT, Custom);
1315 }
1316
1317 // Need to promote to 64-bit even though we have 32-bit masked instructions
1318 // because the IR optimizers rearrange bitcasts around logic ops leaving
1319 // too many variations to handle if we don't promote them.
1320 setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
1321 setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
1322 setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
1323
1324 if (Subtarget.hasDQI()) {
1325 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1326 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1327 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1328 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1329 }
1330
1331 if (Subtarget.hasCDI()) {
1332 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1333 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1334 setOperationAction(ISD::CTLZ, VT, Legal);
1335 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1336 }
1337 } // Subtarget.hasCDI()
1338
1339 if (Subtarget.hasVPOPCNTDQ()) {
1340 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1341 setOperationAction(ISD::CTPOP, VT, Legal);
1342 }
1343
1344 // Extract subvector is special because the value type
1345 // (result) is 256-bit but the source is 512-bit wide.
1346 // 128-bit was made Legal under AVX1.
1347 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1348 MVT::v8f32, MVT::v4f64 })
1349 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1350
1351 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1352 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1353 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1354 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1355 setOperationAction(ISD::VSELECT, VT, Custom);
1356 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1357 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1358 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1359 setOperationAction(ISD::MLOAD, VT, Legal);
1360 setOperationAction(ISD::MSTORE, VT, Legal);
1361 setOperationAction(ISD::MGATHER, VT, Custom);
1362 setOperationAction(ISD::MSCATTER, VT, Custom);
1363 }
1364 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1365 setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
1366 setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
1367 }
1368 }// has AVX-512
1369
1370 // This block controls legalization for operations that don't have
1371 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1372 // narrower widths.
1373 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1374 // These operations are handled on non-VLX by artificially widening in
1375 // isel patterns.
1376 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1377
1378 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1379 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1380 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1381 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1382 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1383
1384 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1385 setOperationAction(ISD::SMAX, VT, Legal);
1386 setOperationAction(ISD::UMAX, VT, Legal);
1387 setOperationAction(ISD::SMIN, VT, Legal);
1388 setOperationAction(ISD::UMIN, VT, Legal);
1389 setOperationAction(ISD::ABS, VT, Legal);
1390 }
1391
1392 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1393 setOperationAction(ISD::ROTL, VT, Custom);
1394 setOperationAction(ISD::ROTR, VT, Custom);
1395 }
1396
1397 // Custom legalize 2x32 to get a little better code.
1398 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1399 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1400
1401 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1402 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1403 setOperationAction(ISD::MSCATTER, VT, Custom);
1404
1405 if (Subtarget.hasDQI()) {
1406 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1407 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1408 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1409 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1410 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1411 }
1412 }
1413
1414 if (Subtarget.hasCDI()) {
1415 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1416 setOperationAction(ISD::CTLZ, VT, Legal);
1417 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1418 }
1419 } // Subtarget.hasCDI()
1420
1421 if (Subtarget.hasVPOPCNTDQ()) {
1422 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1423 setOperationAction(ISD::CTPOP, VT, Legal);
1424 }
1425 }
1426
1427 // This block control legalization of v32i1/v64i1 which are available with
1428 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1429 // useBWIRegs.
1430 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1431 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1432 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1433
1434 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1435 setOperationAction(ISD::ADD, VT, Custom);
1436 setOperationAction(ISD::SUB, VT, Custom);
1437 setOperationAction(ISD::MUL, VT, Custom);
1438 setOperationAction(ISD::VSELECT, VT, Expand);
1439
1440 setOperationAction(ISD::TRUNCATE, VT, Custom);
1441 setOperationAction(ISD::SETCC, VT, Custom);
1442 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1443 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1444 setOperationAction(ISD::SELECT, VT, Custom);
1445 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1446 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1447 }
1448
1449 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1450 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1451 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1452 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1453 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1454 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1455
1456 // Extends from v32i1 masks to 256-bit vectors.
1457 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1458 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1459 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1460 }
1461
1462 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1463 // disabled based on prefer-vector-width and required-vector-width function
1464 // attributes.
1465 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1466 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1467 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1468
1469 // Extends from v64i1 masks to 512-bit vectors.
1470 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1471 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1472 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1473
1474 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1475 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1476 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1477 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1478 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1479 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1480 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1481 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1482 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1483 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1484 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1485 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1486 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1487 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1488 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1489 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1490 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1491 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1492 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1493 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1494 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1495 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1496 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1497
1498 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1499
1500 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1501
1502 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1503 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1504 setOperationAction(ISD::VSELECT, VT, Custom);
1505 setOperationAction(ISD::ABS, VT, Legal);
1506 setOperationAction(ISD::SRL, VT, Custom);
1507 setOperationAction(ISD::SHL, VT, Custom);
1508 setOperationAction(ISD::SRA, VT, Custom);
1509 setOperationAction(ISD::MLOAD, VT, Legal);
1510 setOperationAction(ISD::MSTORE, VT, Legal);
1511 setOperationAction(ISD::CTPOP, VT, Custom);
1512 setOperationAction(ISD::CTTZ, VT, Custom);
1513 setOperationAction(ISD::CTLZ, VT, Custom);
1514 setOperationAction(ISD::SMAX, VT, Legal);
1515 setOperationAction(ISD::UMAX, VT, Legal);
1516 setOperationAction(ISD::SMIN, VT, Legal);
1517 setOperationAction(ISD::UMIN, VT, Legal);
1518
1519 setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
1520 setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
1521 setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
1522 }
1523
1524 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1525 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1526 }
1527
1528 if (Subtarget.hasBITALG()) {
1529 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1530 setOperationAction(ISD::CTPOP, VT, Legal);
1531 }
1532 }
1533
1534 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1535 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1536 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1537 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1538 }
1539
1540 // These operations are handled on non-VLX by artificially widening in
1541 // isel patterns.
1542 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1543
1544 if (Subtarget.hasBITALG()) {
1545 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1546 setOperationAction(ISD::CTPOP, VT, Legal);
1547 }
1548 }
1549
1550 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1551 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1552 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1553 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1554 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1555 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1556
1557 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1558 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1559 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1560 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1561 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1562
1563 if (Subtarget.hasDQI()) {
1564 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1565 // v2f32 UINT_TO_FP is already custom under SSE2.
1566 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1567 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 1568, __extension__ __PRETTY_FUNCTION__))
1568 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 1568, __extension__ __PRETTY_FUNCTION__))
;
1569 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1570 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1571 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1572 }
1573
1574 if (Subtarget.hasBWI()) {
1575 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1576 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1577 }
1578 }
1579
1580 // We want to custom lower some of our intrinsics.
1581 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1582 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1583 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1584 if (!Subtarget.is64Bit()) {
1585 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1586 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1587 }
1588
1589 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1590 // handle type legalization for these operations here.
1591 //
1592 // FIXME: We really should do custom legalization for addition and
1593 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1594 // than generic legalization for 64-bit multiplication-with-overflow, though.
1595 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1596 if (VT == MVT::i64 && !Subtarget.is64Bit())
1597 continue;
1598 // Add/Sub/Mul with overflow operations are custom lowered.
1599 setOperationAction(ISD::SADDO, VT, Custom);
1600 setOperationAction(ISD::UADDO, VT, Custom);
1601 setOperationAction(ISD::SSUBO, VT, Custom);
1602 setOperationAction(ISD::USUBO, VT, Custom);
1603 setOperationAction(ISD::SMULO, VT, Custom);
1604 setOperationAction(ISD::UMULO, VT, Custom);
1605
1606 // Support carry in as value rather than glue.
1607 setOperationAction(ISD::ADDCARRY, VT, Custom);
1608 setOperationAction(ISD::SUBCARRY, VT, Custom);
1609 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1610 }
1611
1612 if (!Subtarget.is64Bit()) {
1613 // These libcalls are not available in 32-bit.
1614 setLibcallName(RTLIB::SHL_I128, nullptr);
1615 setLibcallName(RTLIB::SRL_I128, nullptr);
1616 setLibcallName(RTLIB::SRA_I128, nullptr);
1617 setLibcallName(RTLIB::MUL_I128, nullptr);
1618 }
1619
1620 // Combine sin / cos into _sincos_stret if it is available.
1621 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1622 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1623 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1624 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1625 }
1626
1627 if (Subtarget.isTargetWin64()) {
1628 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1629 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1630 setOperationAction(ISD::SREM, MVT::i128, Custom);
1631 setOperationAction(ISD::UREM, MVT::i128, Custom);
1632 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1633 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1634 }
1635
1636 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1637 // is. We should promote the value to 64-bits to solve this.
1638 // This is what the CRT headers do - `fmodf` is an inline header
1639 // function casting to f64 and calling `fmod`.
1640 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1641 Subtarget.isTargetWindowsItanium()))
1642 for (ISD::NodeType Op :
1643 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1644 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1645 if (isOperationExpand(Op, MVT::f32))
1646 setOperationAction(Op, MVT::f32, Promote);
1647
1648 // We have target-specific dag combine patterns for the following nodes:
1649 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1650 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1651 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1652 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1653 setTargetDAGCombine(ISD::BITCAST);
1654 setTargetDAGCombine(ISD::VSELECT);
1655 setTargetDAGCombine(ISD::SELECT);
1656 setTargetDAGCombine(ISD::SHL);
1657 setTargetDAGCombine(ISD::SRA);
1658 setTargetDAGCombine(ISD::SRL);
1659 setTargetDAGCombine(ISD::OR);
1660 setTargetDAGCombine(ISD::AND);
1661 setTargetDAGCombine(ISD::ADD);
1662 setTargetDAGCombine(ISD::FADD);
1663 setTargetDAGCombine(ISD::FSUB);
1664 setTargetDAGCombine(ISD::FNEG);
1665 setTargetDAGCombine(ISD::FMA);
1666 setTargetDAGCombine(ISD::FMINNUM);
1667 setTargetDAGCombine(ISD::FMAXNUM);
1668 setTargetDAGCombine(ISD::SUB);
1669 setTargetDAGCombine(ISD::LOAD);
1670 setTargetDAGCombine(ISD::MLOAD);
1671 setTargetDAGCombine(ISD::STORE);
1672 setTargetDAGCombine(ISD::MSTORE);
1673 setTargetDAGCombine(ISD::TRUNCATE);
1674 setTargetDAGCombine(ISD::ZERO_EXTEND);
1675 setTargetDAGCombine(ISD::ANY_EXTEND);
1676 setTargetDAGCombine(ISD::SIGN_EXTEND);
1677 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1678 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1679 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1680 setTargetDAGCombine(ISD::SINT_TO_FP);
1681 setTargetDAGCombine(ISD::UINT_TO_FP);
1682 setTargetDAGCombine(ISD::SETCC);
1683 setTargetDAGCombine(ISD::MUL);
1684 setTargetDAGCombine(ISD::XOR);
1685 setTargetDAGCombine(ISD::MSCATTER);
1686 setTargetDAGCombine(ISD::MGATHER);
1687
1688 computeRegisterProperties(Subtarget.getRegisterInfo());
1689
1690 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1691 MaxStoresPerMemsetOptSize = 8;
1692 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1693 MaxStoresPerMemcpyOptSize = 4;
1694 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1695 MaxStoresPerMemmoveOptSize = 4;
1696
1697 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1698 // that needs to benchmarked and balanced with the potential use of vector
1699 // load/store types (PR33329, PR33914).
1700 MaxLoadsPerMemcmp = 2;
1701 MaxLoadsPerMemcmpOptSize = 2;
1702
1703 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1704 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1705
1706 // An out-of-order CPU can speculatively execute past a predictable branch,
1707 // but a conditional move could be stalled by an expensive earlier operation.
1708 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1709 EnableExtLdPromotion = true;
1710 setPrefFunctionAlignment(4); // 2^4 bytes.
1711
1712 verifyIntrinsicTables();
1713}
1714
1715// This has so far only been implemented for 64-bit MachO.
1716bool X86TargetLowering::useLoadStackGuardNode() const {
1717 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1718}
1719
1720bool X86TargetLowering::useStackGuardXorFP() const {
1721 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1722 return Subtarget.getTargetTriple().isOSMSVCRT();
1723}
1724
1725SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1726 const SDLoc &DL) const {
1727 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1728 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1729 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1730 return SDValue(Node, 0);
1731}
1732
1733TargetLoweringBase::LegalizeTypeAction
1734X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1735 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1736 return TypeSplitVector;
1737
1738 if (ExperimentalVectorWideningLegalization &&
1739 VT.getVectorNumElements() != 1 &&
1740 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1741 return TypeWidenVector;
1742
1743 return TargetLoweringBase::getPreferredVectorAction(VT);
1744}
1745
1746MVT X86TargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
1747 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1748 return MVT::v32i8;
1749 return TargetLowering::getRegisterTypeForCallingConv(VT);
1750}
1751
1752MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1753 EVT VT) const {
1754 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1755 return MVT::v32i8;
1756 return TargetLowering::getRegisterTypeForCallingConv(Context, VT);
1757}
1758
1759unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1760 EVT VT) const {
1761 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1762 return 1;
1763 return TargetLowering::getNumRegistersForCallingConv(Context, VT);
1764}
1765
1766EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1767 LLVMContext& Context,
1768 EVT VT) const {
1769 if (!VT.isVector())
1770 return MVT::i8;
1771
1772 if (Subtarget.hasAVX512()) {
1773 const unsigned NumElts = VT.getVectorNumElements();
1774
1775 // Figure out what this type will be legalized to.
1776 EVT LegalVT = VT;
1777 while (getTypeAction(Context, LegalVT) != TypeLegal)
1778 LegalVT = getTypeToTransformTo(Context, LegalVT);
1779
1780 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1781 if (LegalVT.getSimpleVT().is512BitVector())
1782 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1783
1784 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1785 // If we legalized to less than a 512-bit vector, then we will use a vXi1
1786 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1787 // vXi16/vXi8.
1788 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1789 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
1790 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1791 }
1792 }
1793
1794 return VT.changeVectorElementTypeToInteger();
1795}
1796
1797/// Helper for getByValTypeAlignment to determine
1798/// the desired ByVal argument alignment.
1799static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1800 if (MaxAlign == 16)
1801 return;
1802 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1803 if (VTy->getBitWidth() == 128)
1804 MaxAlign = 16;
1805 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1806 unsigned EltAlign = 0;
1807 getMaxByValAlign(ATy->getElementType(), EltAlign);
1808 if (EltAlign > MaxAlign)
1809 MaxAlign = EltAlign;
1810 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1811 for (auto *EltTy : STy->elements()) {
1812 unsigned EltAlign = 0;
1813 getMaxByValAlign(EltTy, EltAlign);
1814 if (EltAlign > MaxAlign)
1815 MaxAlign = EltAlign;
1816 if (MaxAlign == 16)
1817 break;
1818 }
1819 }
1820}
1821
1822/// Return the desired alignment for ByVal aggregate
1823/// function arguments in the caller parameter area. For X86, aggregates
1824/// that contain SSE vectors are placed at 16-byte boundaries while the rest
1825/// are at 4-byte boundaries.
1826unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1827 const DataLayout &DL) const {
1828 if (Subtarget.is64Bit()) {
1829 // Max of 8 and alignment of type.
1830 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1831 if (TyAlign > 8)
1832 return TyAlign;
1833 return 8;
1834 }
1835
1836 unsigned Align = 4;
1837 if (Subtarget.hasSSE1())
1838 getMaxByValAlign(Ty, Align);
1839 return Align;
1840}
1841
1842/// Returns the target specific optimal type for load
1843/// and store operations as a result of memset, memcpy, and memmove
1844/// lowering. If DstAlign is zero that means it's safe to destination
1845/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1846/// means there isn't a need to check it against alignment requirement,
1847/// probably because the source does not need to be loaded. If 'IsMemset' is
1848/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1849/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1850/// source is constant so it does not need to be loaded.
1851/// It returns EVT::Other if the type should be determined using generic
1852/// target-independent logic.
1853EVT
1854X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1855 unsigned DstAlign, unsigned SrcAlign,
1856 bool IsMemset, bool ZeroMemset,
1857 bool MemcpyStrSrc,
1858 MachineFunction &MF) const {
1859 const Function &F = MF.getFunction();
1860 if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
1861 if (Size >= 16 &&
1862 (!Subtarget.isUnalignedMem16Slow() ||
1863 ((DstAlign == 0 || DstAlign >= 16) &&
1864 (SrcAlign == 0 || SrcAlign >= 16)))) {
1865 // FIXME: Check if unaligned 32-byte accesses are slow.
1866 if (Size >= 32 && Subtarget.hasAVX()) {
1867 // Although this isn't a well-supported type for AVX1, we'll let
1868 // legalization and shuffle lowering produce the optimal codegen. If we
1869 // choose an optimal type with a vector element larger than a byte,
1870 // getMemsetStores() may create an intermediate splat (using an integer
1871 // multiply) before we splat as a vector.
1872 return MVT::v32i8;
1873 }
1874 if (Subtarget.hasSSE2())
1875 return MVT::v16i8;
1876 // TODO: Can SSE1 handle a byte vector?
1877 if (Subtarget.hasSSE1())
1878 return MVT::v4f32;
1879 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
1880 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
1881 // Do not use f64 to lower memcpy if source is string constant. It's
1882 // better to use i32 to avoid the loads.
1883 // Also, do not use f64 to lower memset unless this is a memset of zeros.
1884 // The gymnastics of splatting a byte value into an XMM register and then
1885 // only using 8-byte stores (because this is a CPU with slow unaligned
1886 // 16-byte accesses) makes that a loser.
1887 return MVT::f64;
1888 }
1889 }
1890 // This is a compromise. If we reach here, unaligned accesses may be slow on
1891 // this target. However, creating smaller, aligned accesses could be even
1892 // slower and would certainly be a lot more code.
1893 if (Subtarget.is64Bit() && Size >= 8)
1894 return MVT::i64;
1895 return MVT::i32;
1896}
1897
1898bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1899 if (VT == MVT::f32)
1900 return X86ScalarSSEf32;
1901 else if (VT == MVT::f64)
1902 return X86ScalarSSEf64;
1903 return true;
1904}
1905
1906bool
1907X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1908 unsigned,
1909 unsigned,
1910 bool *Fast) const {
1911 if (Fast) {
1912 switch (VT.getSizeInBits()) {
1913 default:
1914 // 8-byte and under are always assumed to be fast.
1915 *Fast = true;
1916 break;
1917 case 128:
1918 *Fast = !Subtarget.isUnalignedMem16Slow();
1919 break;
1920 case 256:
1921 *Fast = !Subtarget.isUnalignedMem32Slow();
1922 break;
1923 // TODO: What about AVX-512 (512-bit) accesses?
1924 }
1925 }
1926 // Misaligned accesses of any size are always allowed.
1927 return true;
1928}
1929
1930/// Return the entry encoding for a jump table in the
1931/// current function. The returned value is a member of the
1932/// MachineJumpTableInfo::JTEntryKind enum.
1933unsigned X86TargetLowering::getJumpTableEncoding() const {
1934 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1935 // symbol.
1936 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
1937 return MachineJumpTableInfo::EK_Custom32;
1938
1939 // Otherwise, use the normal jump table encoding heuristics.
1940 return TargetLowering::getJumpTableEncoding();
1941}
1942
1943bool X86TargetLowering::useSoftFloat() const {
1944 return Subtarget.useSoftFloat();
1945}
1946
1947void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
1948 ArgListTy &Args) const {
1949
1950 // Only relabel X86-32 for C / Stdcall CCs.
1951 if (Subtarget.is64Bit())
1952 return;
1953 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
1954 return;
1955 unsigned ParamRegs = 0;
1956 if (auto *M = MF->getFunction().getParent())
1957 ParamRegs = M->getNumberRegisterParameters();
1958
1959 // Mark the first N int arguments as having reg
1960 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
1961 Type *T = Args[Idx].Ty;
1962 if (T->isPointerTy() || T->isIntegerTy())
1963 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
1964 unsigned numRegs = 1;
1965 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
1966 numRegs = 2;
1967 if (ParamRegs < numRegs)
1968 return;
1969 ParamRegs -= numRegs;
1970 Args[Idx].IsInReg = true;
1971 }
1972 }
1973}
1974
1975const MCExpr *
1976X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1977 const MachineBasicBlock *MBB,
1978 unsigned uid,MCContext &Ctx) const{
1979 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 1979, __extension__ __PRETTY_FUNCTION__))
;
1980 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1981 // entries.
1982 return MCSymbolRefExpr::create(MBB->getSymbol(),
1983 MCSymbolRefExpr::VK_GOTOFF, Ctx);
1984}
1985
1986/// Returns relocation base for the given PIC jumptable.
1987SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
1988 SelectionDAG &DAG) const {
1989 if (!Subtarget.is64Bit())
1990 // This doesn't have SDLoc associated with it, but is not really the
1991 // same as a Register.
1992 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
1993 getPointerTy(DAG.getDataLayout()));
1994 return Table;
1995}
1996
1997/// This returns the relocation base for the given PIC jumptable,
1998/// the same as getPICJumpTableRelocBase, but as an MCExpr.
1999const MCExpr *X86TargetLowering::
2000getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2001 MCContext &Ctx) const {
2002 // X86-64 uses RIP relative addressing based on the jump table label.
2003 if (Subtarget.isPICStyleRIPRel())
2004 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2005
2006 // Otherwise, the reference is relative to the PIC base.
2007 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2008}
2009
2010std::pair<const TargetRegisterClass *, uint8_t>
2011X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2012 MVT VT) const {
2013 const TargetRegisterClass *RRC = nullptr;
2014 uint8_t Cost = 1;
2015 switch (VT.SimpleTy) {
2016 default:
2017 return TargetLowering::findRepresentativeClass(TRI, VT);
2018 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2019 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2020 break;
2021 case MVT::x86mmx:
2022 RRC = &X86::VR64RegClass;
2023 break;
2024 case MVT::f32: case MVT::f64:
2025 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2026 case MVT::v4f32: case MVT::v2f64:
2027 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2028 case MVT::v8f32: case MVT::v4f64:
2029 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2030 case MVT::v16f32: case MVT::v8f64:
2031 RRC = &X86::VR128XRegClass;
2032 break;
2033 }
2034 return std::make_pair(RRC, Cost);
2035}
2036
2037unsigned X86TargetLowering::getAddressSpace() const {
2038 if (Subtarget.is64Bit())
2039 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2040 return 256;
2041}
2042
2043static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2044 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2045 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2046}
2047
2048static Constant* SegmentOffset(IRBuilder<> &IRB,
2049 unsigned Offset, unsigned AddressSpace) {
2050 return ConstantExpr::getIntToPtr(
2051 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2052 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2053}
2054
2055Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2056 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2057 // tcbhead_t; use it instead of the usual global variable (see
2058 // sysdeps/{i386,x86_64}/nptl/tls.h)
2059 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2060 if (Subtarget.isTargetFuchsia()) {
2061 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2062 return SegmentOffset(IRB, 0x10, getAddressSpace());
2063 } else {
2064 // %fs:0x28, unless we're using a Kernel code model, in which case
2065 // it's %gs:0x28. gs:0x14 on i386.
2066 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2067 return SegmentOffset(IRB, Offset, getAddressSpace());
2068 }
2069 }
2070
2071 return TargetLowering::getIRStackGuard(IRB);
2072}
2073
2074void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2075 // MSVC CRT provides functionalities for stack protection.
2076 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
2077 // MSVC CRT has a global variable holding security cookie.
2078 M.getOrInsertGlobal("__security_cookie",
2079 Type::getInt8PtrTy(M.getContext()));
2080
2081 // MSVC CRT has a function to validate security cookie.
2082 auto *SecurityCheckCookie = cast<Function>(
2083 M.getOrInsertFunction("__security_check_cookie",
2084 Type::getVoidTy(M.getContext()),
2085 Type::getInt8PtrTy(M.getContext())));
2086 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2087 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2088 return;
2089 }
2090 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2091 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2092 return;
2093 TargetLowering::insertSSPDeclarations(M);
2094}
2095
2096Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2097 // MSVC CRT has a global variable holding security cookie.
2098 if (Subtarget.getTargetTriple().isOSMSVCRT())
2099 return M.getGlobalVariable("__security_cookie");
2100 return TargetLowering::getSDagStackGuard(M);
2101}
2102
2103Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2104 // MSVC CRT has a function to validate security cookie.
2105 if (Subtarget.getTargetTriple().isOSMSVCRT())
2106 return M.getFunction("__security_check_cookie");
2107 return TargetLowering::getSSPStackGuardCheck(M);
2108}
2109
2110Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2111 if (Subtarget.getTargetTriple().isOSContiki())
2112 return getDefaultSafeStackPointerLocation(IRB, false);
2113
2114 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2115 // definition of TLS_SLOT_SAFESTACK in
2116 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2117 if (Subtarget.isTargetAndroid()) {
2118 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2119 // %gs:0x24 on i386
2120 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2121 return SegmentOffset(IRB, Offset, getAddressSpace());
2122 }
2123
2124 // Fuchsia is similar.
2125 if (Subtarget.isTargetFuchsia()) {
2126 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2127 return SegmentOffset(IRB, 0x18, getAddressSpace());
2128 }
2129
2130 return TargetLowering::getSafeStackPointerLocation(IRB);
2131}
2132
2133bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2134 unsigned DestAS) const {
2135 assert(SrcAS != DestAS && "Expected different address spaces!")(static_cast <bool> (SrcAS != DestAS && "Expected different address spaces!"
) ? void (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2135, __extension__ __PRETTY_FUNCTION__))
;
2136
2137 return SrcAS < 256 && DestAS < 256;
2138}
2139
2140//===----------------------------------------------------------------------===//
2141// Return Value Calling Convention Implementation
2142//===----------------------------------------------------------------------===//
2143
2144#include "X86GenCallingConv.inc"
2145
2146bool X86TargetLowering::CanLowerReturn(
2147 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2148 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2149 SmallVector<CCValAssign, 16> RVLocs;
2150 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2151 return CCInfo.CheckReturn(Outs, RetCC_X86);
2152}
2153
2154const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2155 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2156 return ScratchRegs;
2157}
2158
2159/// Lowers masks values (v*i1) to the local register values
2160/// \returns DAG node after lowering to register type
2161static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2162 const SDLoc &Dl, SelectionDAG &DAG) {
2163 EVT ValVT = ValArg.getValueType();
2164
2165 if (ValVT == MVT::v1i1)
2166 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2167 DAG.getIntPtrConstant(0, Dl));
2168
2169 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2170 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2171 // Two stage lowering might be required
2172 // bitcast: v8i1 -> i8 / v16i1 -> i16
2173 // anyextend: i8 -> i32 / i16 -> i32
2174 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2175 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2176 if (ValLoc == MVT::i32)
2177 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2178 return ValToCopy;
2179 }
2180
2181 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2182 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2183 // One stage lowering is required
2184 // bitcast: v32i1 -> i32 / v64i1 -> i64
2185 return DAG.getBitcast(ValLoc, ValArg);
2186 }
2187
2188 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2189}
2190
2191/// Breaks v64i1 value into two registers and adds the new node to the DAG
2192static void Passv64i1ArgInRegs(
2193 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2194 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2195 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2196 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2196, __extension__ __PRETTY_FUNCTION__))
;
2197 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2197, __extension__ __PRETTY_FUNCTION__))
;
2198 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2198, __extension__ __PRETTY_FUNCTION__))
;
2199 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2200, __extension__ __PRETTY_FUNCTION__))
2200 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2200, __extension__ __PRETTY_FUNCTION__))
;
2201
2202 // Before splitting the value we cast it to i64
2203 Arg = DAG.getBitcast(MVT::i64, Arg);
2204
2205 // Splitting the value into two i32 types
2206 SDValue Lo, Hi;
2207 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2208 DAG.getConstant(0, Dl, MVT::i32));
2209 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2210 DAG.getConstant(1, Dl, MVT::i32));
2211
2212 // Attach the two i32 types into corresponding registers
2213 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2214 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2215}
2216
2217SDValue
2218X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2219 bool isVarArg,
2220 const SmallVectorImpl<ISD::OutputArg> &Outs,
2221 const SmallVectorImpl<SDValue> &OutVals,
2222 const SDLoc &dl, SelectionDAG &DAG) const {
2223 MachineFunction &MF = DAG.getMachineFunction();
2224 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2225
2226 // In some cases we need to disable registers from the default CSR list.
2227 // For example, when they are used for argument passing.
2228 bool ShouldDisableCalleeSavedRegister =
2229 CallConv == CallingConv::X86_RegCall ||
2230 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2231
2232 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2233 report_fatal_error("X86 interrupts may not return any value");
2234
2235 SmallVector<CCValAssign, 16> RVLocs;
2236 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2237 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2238
2239 SDValue Flag;
2240 SmallVector<SDValue, 6> RetOps;
2241 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2242 // Operand #1 = Bytes To Pop
2243 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2244 MVT::i32));
2245
2246 // Copy the result values into the output registers.
2247 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2248 ++I, ++OutsIndex) {
2249 CCValAssign &VA = RVLocs[I];
2250 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2250, __extension__ __PRETTY_FUNCTION__))
;
2251
2252 // Add the register to the CalleeSaveDisableRegs list.
2253 if (ShouldDisableCalleeSavedRegister)
2254 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2255
2256 SDValue ValToCopy = OutVals[OutsIndex];
2257 EVT ValVT = ValToCopy.getValueType();
2258
2259 // Promote values to the appropriate types.
2260 if (VA.getLocInfo() == CCValAssign::SExt)
2261 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2262 else if (VA.getLocInfo() == CCValAssign::ZExt)
2263 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2264 else if (VA.getLocInfo() == CCValAssign::AExt) {
2265 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2266 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2267 else
2268 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2269 }
2270 else if (VA.getLocInfo() == CCValAssign::BCvt)
2271 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2272
2273 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2274, __extension__ __PRETTY_FUNCTION__))
2274 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2274, __extension__ __PRETTY_FUNCTION__))
;
2275
2276 // If this is x86-64, and we disabled SSE, we can't return FP values,
2277 // or SSE or MMX vectors.
2278 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2279 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2280 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2281 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2282 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2283 } else if (ValVT == MVT::f64 &&
2284 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2285 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2286 // llvm-gcc has never done it right and no one has noticed, so this
2287 // should be OK for now.
2288 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2289 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2290 }
2291
2292 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2293 // the RET instruction and handled by the FP Stackifier.
2294 if (VA.getLocReg() == X86::FP0 ||
2295 VA.getLocReg() == X86::FP1) {
2296 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2297 // change the value to the FP stack register class.
2298 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2299 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2300 RetOps.push_back(ValToCopy);
2301 // Don't emit a copytoreg.
2302 continue;
2303 }
2304
2305 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2306 // which is returned in RAX / RDX.
2307 if (Subtarget.is64Bit()) {
2308 if (ValVT == MVT::x86mmx) {
2309 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2310 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2311 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2312 ValToCopy);
2313 // If we don't have SSE2 available, convert to v4f32 so the generated
2314 // register is legal.
2315 if (!Subtarget.hasSSE2())
2316 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2317 }
2318 }
2319 }
2320
2321 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2322
2323 if (VA.needsCustom()) {
2324 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2325, __extension__ __PRETTY_FUNCTION__))
2325 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2325, __extension__ __PRETTY_FUNCTION__))
;
2326
2327 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2328 Subtarget);
2329
2330 assert(2 == RegsToPass.size() &&(static_cast <bool> (2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? void (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2331, __extension__ __PRETTY_FUNCTION__))
2331 "Expecting two registers after Pass64BitArgInRegs")(static_cast <bool> (2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? void (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2331, __extension__ __PRETTY_FUNCTION__))
;
2332
2333 // Add the second register to the CalleeSaveDisableRegs list.
2334 if (ShouldDisableCalleeSavedRegister)
2335 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2336 } else {
2337 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2338 }
2339
2340 // Add nodes to the DAG and add the values into the RetOps list
2341 for (auto &Reg : RegsToPass) {
2342 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2343 Flag = Chain.getValue(1);
2344 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2345 }
2346 }
2347
2348 // Swift calling convention does not require we copy the sret argument
2349 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2350
2351 // All x86 ABIs require that for returning structs by value we copy
2352 // the sret argument into %rax/%eax (depending on ABI) for the return.
2353 // We saved the argument into a virtual register in the entry block,
2354 // so now we copy the value out and into %rax/%eax.
2355 //
2356 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2357 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2358 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2359 // either case FuncInfo->setSRetReturnReg() will have been called.
2360 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2361 // When we have both sret and another return value, we should use the
2362 // original Chain stored in RetOps[0], instead of the current Chain updated
2363 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2364
2365 // For the case of sret and another return value, we have
2366 // Chain_0 at the function entry
2367 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2368 // If we use Chain_1 in getCopyFromReg, we will have
2369 // Val = getCopyFromReg(Chain_1)
2370 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2371
2372 // getCopyToReg(Chain_0) will be glued together with
2373 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2374 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2375 // Data dependency from Unit B to Unit A due to usage of Val in
2376 // getCopyToReg(Chain_1, Val)
2377 // Chain dependency from Unit A to Unit B
2378
2379 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2380 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2381 getPointerTy(MF.getDataLayout()));
2382
2383 unsigned RetValReg
2384 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2385 X86::RAX : X86::EAX;
2386 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2387 Flag = Chain.getValue(1);
2388
2389 // RAX/EAX now acts like a return value.
2390 RetOps.push_back(
2391 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2392
2393 // Add the returned register to the CalleeSaveDisableRegs list.
2394 if (ShouldDisableCalleeSavedRegister)
2395 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2396 }
2397
2398 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2399 const MCPhysReg *I =
2400 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2401 if (I) {
2402 for (; *I; ++I) {
2403 if (X86::GR64RegClass.contains(*I))
2404 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2405 else
2406 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2406)
;
2407 }
2408 }
2409
2410 RetOps[0] = Chain; // Update chain.
2411
2412 // Add the flag if we have it.
2413 if (Flag.getNode())
2414 RetOps.push_back(Flag);
2415
2416 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2417 if (CallConv == CallingConv::X86_INTR)
2418 opcode = X86ISD::IRET;
2419 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2420}
2421
2422bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2423 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2424 return false;
2425
2426 SDValue TCChain = Chain;
2427 SDNode *Copy = *N->use_begin();
2428 if (Copy->getOpcode() == ISD::CopyToReg) {
2429 // If the copy has a glue operand, we conservatively assume it isn't safe to
2430 // perform a tail call.
2431 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2432 return false;
2433 TCChain = Copy->getOperand(0);
2434 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2435 return false;
2436
2437 bool HasRet = false;
2438 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2439 UI != UE; ++UI) {
2440 if (UI->getOpcode() != X86ISD::RET_FLAG)
2441 return false;
2442 // If we are returning more than one value, we can definitely
2443 // not make a tail call see PR19530
2444 if (UI->getNumOperands() > 4)
2445 return false;
2446 if (UI->getNumOperands() == 4 &&
2447 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2448 return false;
2449 HasRet = true;
2450 }
2451
2452 if (!HasRet)
2453 return false;
2454
2455 Chain = TCChain;
2456 return true;
2457}
2458
2459EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2460 ISD::NodeType ExtendKind) const {
2461 MVT ReturnMVT = MVT::i32;
2462
2463 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2464 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2465 // The ABI does not require i1, i8 or i16 to be extended.
2466 //
2467 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2468 // always extending i8/i16 return values, so keep doing that for now.
2469 // (PR26665).
2470 ReturnMVT = MVT::i8;
2471 }
2472
2473 EVT MinVT = getRegisterType(Context, ReturnMVT);
2474 return VT.bitsLT(MinVT) ? MinVT : VT;
2475}
2476
2477/// Reads two 32 bit registers and creates a 64 bit mask value.
2478/// \param VA The current 32 bit value that need to be assigned.
2479/// \param NextVA The next 32 bit value that need to be assigned.
2480/// \param Root The parent DAG node.
2481/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2482/// glue purposes. In the case the DAG is already using
2483/// physical register instead of virtual, we should glue
2484/// our new SDValue to InFlag SDvalue.
2485/// \return a new SDvalue of size 64bit.
2486static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2487 SDValue &Root, SelectionDAG &DAG,
2488 const SDLoc &Dl, const X86Subtarget &Subtarget,
2489 SDValue *InFlag = nullptr) {
2490 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2490, __extension__ __PRETTY_FUNCTION__))
;
2491 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2491, __extension__ __PRETTY_FUNCTION__))
;
2492 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2493, __extension__ __PRETTY_FUNCTION__))
2493 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2493, __extension__ __PRETTY_FUNCTION__))
;
2494 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2495, __extension__ __PRETTY_FUNCTION__))
2495 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2495, __extension__ __PRETTY_FUNCTION__))
;
2496 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2497, __extension__ __PRETTY_FUNCTION__))
2497 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2497, __extension__ __PRETTY_FUNCTION__))
;
2498
2499 SDValue Lo, Hi;
2500 unsigned Reg;
2501 SDValue ArgValueLo, ArgValueHi;
2502
2503 MachineFunction &MF = DAG.getMachineFunction();
2504 const TargetRegisterClass *RC = &X86::GR32RegClass;
2505
2506 // Read a 32 bit value from the registers
2507 if (nullptr == InFlag) {
2508 // When no physical register is present,
2509 // create an intermediate virtual register
2510 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2511 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2512 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2513 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2514 } else {
2515 // When a physical register is available read the value from it and glue
2516 // the reads together.
2517 ArgValueLo =
2518 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2519 *InFlag = ArgValueLo.getValue(2);
2520 ArgValueHi =
2521 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2522 *InFlag = ArgValueHi.getValue(2);
2523 }
2524
2525 // Convert the i32 type into v32i1 type
2526 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2527
2528 // Convert the i32 type into v32i1 type
2529 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2530
2531 // Concatenate the two values together
2532 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2533}
2534
2535/// The function will lower a register of various sizes (8/16/32/64)
2536/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2537/// \returns a DAG node contains the operand after lowering to mask type.
2538static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2539 const EVT &ValLoc, const SDLoc &Dl,
2540 SelectionDAG &DAG) {
2541 SDValue ValReturned = ValArg;
2542
2543 if (ValVT == MVT::v1i1)
2544 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2545
2546 if (ValVT == MVT::v64i1) {
2547 // In 32 bit machine, this case is handled by getv64i1Argument
2548 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2548, __extension__ __PRETTY_FUNCTION__))
;
2549 // In 64 bit machine, There is no need to truncate the value only bitcast
2550 } else {
2551 MVT maskLen;
2552 switch (ValVT.getSimpleVT().SimpleTy) {
2553 case MVT::v8i1:
2554 maskLen = MVT::i8;
2555 break;
2556 case MVT::v16i1:
2557 maskLen = MVT::i16;
2558 break;
2559 case MVT::v32i1:
2560 maskLen = MVT::i32;
2561 break;
2562 default:
2563 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2563)
;
2564 }
2565
2566 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2567 }
2568 return DAG.getBitcast(ValVT, ValReturned);
2569}
2570
2571/// Lower the result values of a call into the
2572/// appropriate copies out of appropriate physical registers.
2573///
2574SDValue X86TargetLowering::LowerCallResult(
2575 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2576 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2577 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2578 uint32_t *RegMask) const {
2579
2580 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2581 // Assign locations to each value returned by this call.
2582 SmallVector<CCValAssign, 16> RVLocs;
2583 bool Is64Bit = Subtarget.is64Bit();
2584 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2585 *DAG.getContext());
2586 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2587
2588 // Copy all of the result registers out of their specified physreg.
2589 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2590 ++I, ++InsIndex) {
2591 CCValAssign &VA = RVLocs[I];
2592 EVT CopyVT = VA.getLocVT();
2593
2594 // In some calling conventions we need to remove the used registers
2595 // from the register mask.
2596 if (RegMask) {
2597 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2598 SubRegs.isValid(); ++SubRegs)
2599 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2600 }
2601
2602 // If this is x86-64, and we disabled SSE, we can't return FP values
2603 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2604 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2605 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2606 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2607 }
2608
2609 // If we prefer to use the value in xmm registers, copy it out as f80 and
2610 // use a truncate to move it from fp stack reg to xmm reg.
2611 bool RoundAfterCopy = false;
2612 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2613 isScalarFPTypeInSSEReg(VA.getValVT())) {
2614 if (!Subtarget.hasX87())
2615 report_fatal_error("X87 register return with X87 disabled");
2616 CopyVT = MVT::f80;
2617 RoundAfterCopy = (CopyVT != VA.getLocVT());
2618 }
2619
2620 SDValue Val;
2621 if (VA.needsCustom()) {
2622 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2623, __extension__ __PRETTY_FUNCTION__))
2623 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2623, __extension__ __PRETTY_FUNCTION__))
;
2624 Val =
2625 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2626 } else {
2627 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2628 .getValue(1);
2629 Val = Chain.getValue(0);
2630 InFlag = Chain.getValue(2);
2631 }
2632
2633 if (RoundAfterCopy)
2634 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2635 // This truncation won't change the value.
2636 DAG.getIntPtrConstant(1, dl));
2637
2638 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2639 if (VA.getValVT().isVector() &&
2640 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2641 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2642 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2643 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2644 } else
2645 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2646 }
2647
2648 InVals.push_back(Val);
2649 }
2650
2651 return Chain;
2652}
2653
2654//===----------------------------------------------------------------------===//
2655// C & StdCall & Fast Calling Convention implementation
2656//===----------------------------------------------------------------------===//
2657// StdCall calling convention seems to be standard for many Windows' API
2658// routines and around. It differs from C calling convention just a little:
2659// callee should clean up the stack, not caller. Symbols should be also
2660// decorated in some fancy way :) It doesn't support any vector arguments.
2661// For info on fast calling convention see Fast Calling Convention (tail call)
2662// implementation LowerX86_32FastCCCallTo.
2663
2664/// CallIsStructReturn - Determines whether a call uses struct return
2665/// semantics.
2666enum StructReturnType {
2667 NotStructReturn,
2668 RegStructReturn,
2669 StackStructReturn
2670};
2671static StructReturnType
2672callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2673 if (Outs.empty())
2674 return NotStructReturn;
2675
2676 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2677 if (!Flags.isSRet())
2678 return NotStructReturn;
2679 if (Flags.isInReg() || IsMCU)
2680 return RegStructReturn;
2681 return StackStructReturn;
2682}
2683
2684/// Determines whether a function uses struct return semantics.
2685static StructReturnType
2686argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2687 if (Ins.empty())
2688 return NotStructReturn;
2689
2690 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2691 if (!Flags.isSRet())
2692 return NotStructReturn;
2693 if (Flags.isInReg() || IsMCU)
2694 return RegStructReturn;
2695 return StackStructReturn;
2696}
2697
2698/// Make a copy of an aggregate at address specified by "Src" to address
2699/// "Dst" with size and alignment information specified by the specific
2700/// parameter attribute. The copy will be passed as a byval function parameter.
2701static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2702 SDValue Chain, ISD::ArgFlagsTy Flags,
2703 SelectionDAG &DAG, const SDLoc &dl) {
2704 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2705
2706 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2707 /*isVolatile*/false, /*AlwaysInline=*/true,
2708 /*isTailCall*/false,
2709 MachinePointerInfo(), MachinePointerInfo());
2710}
2711
2712/// Return true if the calling convention is one that we can guarantee TCO for.
2713static bool canGuaranteeTCO(CallingConv::ID CC) {
2714 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2715 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2716 CC == CallingConv::HHVM);
2717}
2718
2719/// Return true if we might ever do TCO for calls with this calling convention.
2720static bool mayTailCallThisCC(CallingConv::ID CC) {
2721 switch (CC) {
2722 // C calling conventions:
2723 case CallingConv::C:
2724 case CallingConv::Win64:
2725 case CallingConv::X86_64_SysV:
2726 // Callee pop conventions:
2727 case CallingConv::X86_ThisCall:
2728 case CallingConv::X86_StdCall:
2729 case CallingConv::X86_VectorCall:
2730 case CallingConv::X86_FastCall:
2731 return true;
2732 default:
2733 return canGuaranteeTCO(CC);
2734 }
2735}
2736
2737/// Return true if the function is being made into a tailcall target by
2738/// changing its ABI.
2739static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2740 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2741}
2742
2743bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2744 auto Attr =
2745 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2746 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2747 return false;
2748
2749 ImmutableCallSite CS(CI);
2750 CallingConv::ID CalleeCC = CS.getCallingConv();
2751 if (!mayTailCallThisCC(CalleeCC))
2752 return false;
2753
2754 return true;
2755}
2756
2757SDValue
2758X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2759 const SmallVectorImpl<ISD::InputArg> &Ins,
2760 const SDLoc &dl, SelectionDAG &DAG,
2761 const CCValAssign &VA,
2762 MachineFrameInfo &MFI, unsigned i) const {
2763 // Create the nodes corresponding to a load from this parameter slot.
2764 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2765 bool AlwaysUseMutable = shouldGuaranteeTCO(
2766 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2767 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2768 EVT ValVT;
2769 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2770
2771 // If value is passed by pointer we have address passed instead of the value
2772 // itself. No need to extend if the mask value and location share the same
2773 // absolute size.
2774 bool ExtendedInMem =
2775 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2776 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2777
2778 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2779 ValVT = VA.getLocVT();
2780 else
2781 ValVT = VA.getValVT();
2782
2783 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2784 // taken by a return address.
2785 int Offset = 0;
2786 if (CallConv == CallingConv::X86_INTR) {
2787 // X86 interrupts may take one or two arguments.
2788 // On the stack there will be no return address as in regular call.
2789 // Offset of last argument need to be set to -4/-8 bytes.
2790 // Where offset of the first argument out of two, should be set to 0 bytes.
2791 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2792 if (Subtarget.is64Bit() && Ins.size() == 2) {
2793 // The stack pointer needs to be realigned for 64 bit handlers with error
2794 // code, so the argument offset changes by 8 bytes.
2795 Offset += 8;
2796 }
2797 }
2798
2799 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2800 // changed with more analysis.
2801 // In case of tail call optimization mark all arguments mutable. Since they
2802 // could be overwritten by lowering of arguments in case of a tail call.
2803 if (Flags.isByVal()) {
2804 unsigned Bytes = Flags.getByValSize();
2805 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2806 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2807 // Adjust SP offset of interrupt parameter.
2808 if (CallConv == CallingConv::X86_INTR) {
2809 MFI.setObjectOffset(FI, Offset);
2810 }
2811 return DAG.getFrameIndex(FI, PtrVT);
2812 }
2813
2814 // This is an argument in memory. We might be able to perform copy elision.
2815 if (Flags.isCopyElisionCandidate()) {
2816 EVT ArgVT = Ins[i].ArgVT;
2817 SDValue PartAddr;
2818 if (Ins[i].PartOffset == 0) {
2819 // If this is a one-part value or the first part of a multi-part value,
2820 // create a stack object for the entire argument value type and return a
2821 // load from our portion of it. This assumes that if the first part of an
2822 // argument is in memory, the rest will also be in memory.
2823 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
2824 /*Immutable=*/false);
2825 PartAddr = DAG.getFrameIndex(FI, PtrVT);
2826 return DAG.getLoad(
2827 ValVT, dl, Chain, PartAddr,
2828 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2829 } else {
2830 // This is not the first piece of an argument in memory. See if there is
2831 // already a fixed stack object including this offset. If so, assume it
2832 // was created by the PartOffset == 0 branch above and create a load from
2833 // the appropriate offset into it.
2834 int64_t PartBegin = VA.getLocMemOffset();
2835 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
2836 int FI = MFI.getObjectIndexBegin();
2837 for (; MFI.isFixedObjectIndex(FI); ++FI) {
2838 int64_t ObjBegin = MFI.getObjectOffset(FI);
2839 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
2840 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
2841 break;
2842 }
2843 if (MFI.isFixedObjectIndex(FI)) {
2844 SDValue Addr =
2845 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
2846 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
2847 return DAG.getLoad(
2848 ValVT, dl, Chain, Addr,
2849 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
2850 Ins[i].PartOffset));
2851 }
2852 }
2853 }
2854
2855 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2856 VA.getLocMemOffset(), isImmutable);
2857
2858 // Set SExt or ZExt flag.
2859 if (VA.getLocInfo() == CCValAssign::ZExt) {
2860 MFI.setObjectZExt(FI, true);
2861 } else if (VA.getLocInfo() == CCValAssign::SExt) {
2862 MFI.setObjectSExt(FI, true);
2863 }
2864
2865 // Adjust SP offset of interrupt parameter.
2866 if (CallConv == CallingConv::X86_INTR) {
2867 MFI.setObjectOffset(FI, Offset);
2868 }
2869
2870 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2871 SDValue Val = DAG.getLoad(
2872 ValVT, dl, Chain, FIN,
2873 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2874 return ExtendedInMem
2875 ? (VA.getValVT().isVector()
2876 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
2877 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
2878 : Val;
2879}
2880
2881// FIXME: Get this from tablegen.
2882static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2883 const X86Subtarget &Subtarget) {
2884 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2884, __extension__ __PRETTY_FUNCTION__))
;
2885
2886 if (Subtarget.isCallingConvWin64(CallConv)) {
2887 static const MCPhysReg GPR64ArgRegsWin64[] = {
2888 X86::RCX, X86::RDX, X86::R8, X86::R9
2889 };
2890 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2891 }
2892
2893 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2894 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2895 };
2896 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2897}
2898
2899// FIXME: Get this from tablegen.
2900static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2901 CallingConv::ID CallConv,
2902 const X86Subtarget &Subtarget) {
2903 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2903, __extension__ __PRETTY_FUNCTION__))
;
2904 if (Subtarget.isCallingConvWin64(CallConv)) {
2905 // The XMM registers which might contain var arg parameters are shadowed
2906 // in their paired GPR. So we only need to save the GPR to their home
2907 // slots.
2908 // TODO: __vectorcall will change this.
2909 return None;
2910 }
2911
2912 const Function &F = MF.getFunction();
2913 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
2914 bool isSoftFloat = Subtarget.useSoftFloat();
2915 assert(!(isSoftFloat && NoImplicitFloatOps) &&(static_cast <bool> (!(isSoftFloat && NoImplicitFloatOps
) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2916, __extension__ __PRETTY_FUNCTION__))
2916 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(isSoftFloat && NoImplicitFloatOps
) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2916, __extension__ __PRETTY_FUNCTION__))
;
2917 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
2918 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2919 // registers.
2920 return None;
2921
2922 static const MCPhysReg XMMArgRegs64Bit[] = {
2923 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2924 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2925 };
2926 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2927}
2928
2929#ifndef NDEBUG
2930static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
2931 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2932 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2933 return A.getValNo() < B.getValNo();
2934 });
2935}
2936#endif
2937
2938SDValue X86TargetLowering::LowerFormalArguments(
2939 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2940 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2941 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2942 MachineFunction &MF = DAG.getMachineFunction();
2943 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2944 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
2945
2946 const Function &F = MF.getFunction();
2947 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
2948 F.getName() == "main")
2949 FuncInfo->setForceFramePointer(true);
2950
2951 MachineFrameInfo &MFI = MF.getFrameInfo();
2952 bool Is64Bit = Subtarget.is64Bit();
2953 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2954
2955 assert((static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2957, __extension__ __PRETTY_FUNCTION__))
2956 !(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2957, __extension__ __PRETTY_FUNCTION__))
2957 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2957, __extension__ __PRETTY_FUNCTION__))
;
2958
2959 if (CallConv == CallingConv::X86_INTR) {
2960 bool isLegal = Ins.size() == 1 ||
2961 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2962 (!Is64Bit && Ins[1].VT == MVT::i32)));
2963 if (!isLegal)
2964 report_fatal_error("X86 interrupts may take one or two arguments");
2965 }
2966
2967 // Assign locations to all of the incoming arguments.
2968 SmallVector<CCValAssign, 16> ArgLocs;
2969 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2970
2971 // Allocate shadow area for Win64.
2972 if (IsWin64)
2973 CCInfo.AllocateStack(32, 8);
2974
2975 CCInfo.AnalyzeArguments(Ins, CC_X86);
2976
2977 // In vectorcall calling convention a second pass is required for the HVA
2978 // types.
2979 if (CallingConv::X86_VectorCall == CallConv) {
2980 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
2981 }
2982
2983 // The next loop assumes that the locations are in the same order of the
2984 // input arguments.
2985 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2986, __extension__ __PRETTY_FUNCTION__))
2986 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2986, __extension__ __PRETTY_FUNCTION__))
;
2987
2988 SDValue ArgValue;
2989 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
2990 ++I, ++InsIndex) {
2991 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2991, __extension__ __PRETTY_FUNCTION__))
;
2992 CCValAssign &VA = ArgLocs[I];
2993
2994 if (VA.isRegLoc()) {
2995 EVT RegVT = VA.getLocVT();
2996 if (VA.needsCustom()) {
2997 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2999, __extension__ __PRETTY_FUNCTION__))
2998 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2999, __extension__ __PRETTY_FUNCTION__))
2999 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 2999, __extension__ __PRETTY_FUNCTION__))
;
3000
3001 // v64i1 values, in regcall calling convention, that are
3002 // compiled to 32 bit arch, are split up into two registers.
3003 ArgValue =
3004 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3005 } else {
3006 const TargetRegisterClass *RC;
3007 if (RegVT == MVT::i32)
3008 RC = &X86::GR32RegClass;
3009 else if (Is64Bit && RegVT == MVT::i64)
3010 RC = &X86::GR64RegClass;
3011 else if (RegVT == MVT::f32)
3012 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3013 else if (RegVT == MVT::f64)
3014 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3015 else if (RegVT == MVT::f80)
3016 RC = &X86::RFP80RegClass;
3017 else if (RegVT == MVT::f128)
3018 RC = &X86::FR128RegClass;
3019 else if (RegVT.is512BitVector())
3020 RC = &X86::VR512RegClass;
3021 else if (RegVT.is256BitVector())
3022 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3023 else if (RegVT.is128BitVector())
3024 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3025 else if (RegVT == MVT::x86mmx)
3026 RC = &X86::VR64RegClass;
3027 else if (RegVT == MVT::v1i1)
3028 RC = &X86::VK1RegClass;
3029 else if (RegVT == MVT::v8i1)
3030 RC = &X86::VK8RegClass;
3031 else if (RegVT == MVT::v16i1)
3032 RC = &X86::VK16RegClass;
3033 else if (RegVT == MVT::v32i1)
3034 RC = &X86::VK32RegClass;
3035 else if (RegVT == MVT::v64i1)
3036 RC = &X86::VK64RegClass;
3037 else
3038 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3038)
;
3039
3040 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3041 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3042 }
3043
3044 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3045 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3046 // right size.
3047 if (VA.getLocInfo() == CCValAssign::SExt)
3048 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3049 DAG.getValueType(VA.getValVT()));
3050 else if (VA.getLocInfo() == CCValAssign::ZExt)
3051 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3052 DAG.getValueType(VA.getValVT()));
3053 else if (VA.getLocInfo() == CCValAssign::BCvt)
3054 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3055
3056 if (VA.isExtInLoc()) {
3057 // Handle MMX values passed in XMM regs.
3058 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3059 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3060 else if (VA.getValVT().isVector() &&
3061 VA.getValVT().getScalarType() == MVT::i1 &&
3062 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3063 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3064 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3065 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3066 } else
3067 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3068 }
3069 } else {
3070 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3070, __extension__ __PRETTY_FUNCTION__))
;
3071 ArgValue =
3072 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3073 }
3074
3075 // If value is passed via pointer - do a load.
3076 if (VA.getLocInfo() == CCValAssign::Indirect)
3077 ArgValue =
3078 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3079
3080 InVals.push_back(ArgValue);
3081 }
3082
3083 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3084 // Swift calling convention does not require we copy the sret argument
3085 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3086 if (CallConv == CallingConv::Swift)
3087 continue;
3088
3089 // All x86 ABIs require that for returning structs by value we copy the
3090 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3091 // the argument into a virtual register so that we can access it from the
3092 // return points.
3093 if (Ins[I].Flags.isSRet()) {
3094 unsigned Reg = FuncInfo->getSRetReturnReg();
3095 if (!Reg) {
3096 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3097 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3098 FuncInfo->setSRetReturnReg(Reg);
3099 }
3100 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3101 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3102 break;
3103 }
3104 }
3105
3106 unsigned StackSize = CCInfo.getNextStackOffset();
3107 // Align stack specially for tail calls.
3108 if (shouldGuaranteeTCO(CallConv,
3109 MF.getTarget().Options.GuaranteedTailCallOpt))
3110 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3111
3112 // If the function takes variable number of arguments, make a frame index for
3113 // the start of the first vararg value... for expansion of llvm.va_start. We
3114 // can skip this if there are no va_start calls.
3115 if (MFI.hasVAStart() &&
3116 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3117 CallConv != CallingConv::X86_ThisCall))) {
3118 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3119 }
3120
3121 // Figure out if XMM registers are in use.
3122 assert(!(Subtarget.useSoftFloat() &&(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3124, __extension__ __PRETTY_FUNCTION__))
3123 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3124, __extension__ __PRETTY_FUNCTION__))
3124 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3124, __extension__ __PRETTY_FUNCTION__))
;
3125
3126 // 64-bit calling conventions support varargs and register parameters, so we
3127 // have to do extra work to spill them in the prologue.
3128 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3129 // Find the first unallocated argument registers.
3130 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3131 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3132 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3133 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3134 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3135, __extension__ __PRETTY_FUNCTION__))
3135 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3135, __extension__ __PRETTY_FUNCTION__))
;
3136
3137 // Gather all the live in physical registers.
3138 SmallVector<SDValue, 6> LiveGPRs;
3139 SmallVector<SDValue, 8> LiveXMMRegs;
3140 SDValue ALVal;
3141 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3142 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3143 LiveGPRs.push_back(
3144 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3145 }
3146 if (!ArgXMMs.empty()) {
3147 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3148 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3149 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3150 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3151 LiveXMMRegs.push_back(
3152 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3153 }
3154 }
3155
3156 if (IsWin64) {
3157 // Get to the caller-allocated home save location. Add 8 to account
3158 // for the return address.
3159 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3160 FuncInfo->setRegSaveFrameIndex(
3161 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3162 // Fixup to set vararg frame on shadow area (4 x i64).
3163 if (NumIntRegs < 4)
3164 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3165 } else {
3166 // For X86-64, if there are vararg parameters that are passed via
3167 // registers, then we must store them to their spots on the stack so
3168 // they may be loaded by dereferencing the result of va_next.
3169 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3170 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3171 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3172 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3173 }
3174
3175 // Store the integer parameter registers.
3176 SmallVector<SDValue, 8> MemOps;
3177 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3178 getPointerTy(DAG.getDataLayout()));
3179 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3180 for (SDValue Val : LiveGPRs) {
3181 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3182 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3183 SDValue Store =
3184 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3185 MachinePointerInfo::getFixedStack(
3186 DAG.getMachineFunction(),
3187 FuncInfo->getRegSaveFrameIndex(), Offset));
3188 MemOps.push_back(Store);
3189 Offset += 8;
3190 }
3191
3192 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3193 // Now store the XMM (fp + vector) parameter registers.
3194 SmallVector<SDValue, 12> SaveXMMOps;
3195 SaveXMMOps.push_back(Chain);
3196 SaveXMMOps.push_back(ALVal);
3197 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3198 FuncInfo->getRegSaveFrameIndex(), dl));
3199 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3200 FuncInfo->getVarArgsFPOffset(), dl));
3201 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3202 LiveXMMRegs.end());
3203 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3204 MVT::Other, SaveXMMOps));
3205 }
3206
3207 if (!MemOps.empty())
3208 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3209 }
3210
3211 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3212 // Find the largest legal vector type.
3213 MVT VecVT = MVT::Other;
3214 // FIXME: Only some x86_32 calling conventions support AVX512.
3215 if (Subtarget.hasAVX512() &&
3216 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3217 CallConv == CallingConv::Intel_OCL_BI)))
3218 VecVT = MVT::v16f32;
3219 else if (Subtarget.hasAVX())
3220 VecVT = MVT::v8f32;
3221 else if (Subtarget.hasSSE2())
3222 VecVT = MVT::v4f32;
3223
3224 // We forward some GPRs and some vector types.
3225 SmallVector<MVT, 2> RegParmTypes;
3226 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3227 RegParmTypes.push_back(IntVT);
3228 if (VecVT != MVT::Other)
3229 RegParmTypes.push_back(VecVT);
3230
3231 // Compute the set of forwarded registers. The rest are scratch.
3232 SmallVectorImpl<ForwardedRegister> &Forwards =
3233 FuncInfo->getForwardedMustTailRegParms();
3234 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3235
3236 // Conservatively forward AL on x86_64, since it might be used for varargs.
3237 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3238 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3239 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3240 }
3241
3242 // Copy all forwards from physical to virtual registers.
3243 for (ForwardedRegister &F : Forwards) {
3244 // FIXME: Can we use a less constrained schedule?
3245 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3246 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3247 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3248 }
3249 }
3250
3251 // Some CCs need callee pop.
3252 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3253 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3254 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3255 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3256 // X86 interrupts must pop the error code (and the alignment padding) if
3257 // present.
3258 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3259 } else {
3260 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3261 // If this is an sret function, the return should pop the hidden pointer.
3262 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3263 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3264 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3265 FuncInfo->setBytesToPopOnReturn(4);
3266 }
3267
3268 if (!Is64Bit) {
3269 // RegSaveFrameIndex is X86-64 only.
3270 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3271 if (CallConv == CallingConv::X86_FastCall ||
3272 CallConv == CallingConv::X86_ThisCall)
3273 // fastcc functions can't have varargs.
3274 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3275 }
3276
3277 FuncInfo->setArgumentStackSize(StackSize);
3278
3279 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3280 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3281 if (Personality == EHPersonality::CoreCLR) {
3282 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3282, __extension__ __PRETTY_FUNCTION__))
;
3283 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3284 // that we'd prefer this slot be allocated towards the bottom of the frame
3285 // (i.e. near the stack pointer after allocating the frame). Every
3286 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3287 // offset from the bottom of this and each funclet's frame must be the
3288 // same, so the size of funclets' (mostly empty) frames is dictated by
3289 // how far this slot is from the bottom (since they allocate just enough
3290 // space to accommodate holding this slot at the correct offset).
3291 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3292 EHInfo->PSPSymFrameIdx = PSPSymFI;
3293 }
3294 }
3295
3296 if (CallConv == CallingConv::X86_RegCall ||
3297 F.hasFnAttribute("no_caller_saved_registers")) {
3298 MachineRegisterInfo &MRI = MF.getRegInfo();
3299 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3300 MRI.disableCalleeSavedRegister(Pair.first);
3301 }
3302
3303 return Chain;
3304}
3305
3306SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3307 SDValue Arg, const SDLoc &dl,
3308 SelectionDAG &DAG,
3309 const CCValAssign &VA,
3310 ISD::ArgFlagsTy Flags) const {
3311 unsigned LocMemOffset = VA.getLocMemOffset();
3312 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3313 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3314 StackPtr, PtrOff);
3315 if (Flags.isByVal())
3316 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3317
3318 return DAG.getStore(
3319 Chain, dl, Arg, PtrOff,
3320 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3321}
3322
3323/// Emit a load of return address if tail call
3324/// optimization is performed and it is required.
3325SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3326 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3327 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3328 // Adjust the Return address stack slot.
3329 EVT VT = getPointerTy(DAG.getDataLayout());
3330 OutRetAddr = getReturnAddressFrameIndex(DAG);
3331
3332 // Load the "old" Return address.
3333 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3334 return SDValue(OutRetAddr.getNode(), 1);
3335}
3336
3337/// Emit a store of the return address if tail call
3338/// optimization is performed and it is required (FPDiff!=0).
3339static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3340 SDValue Chain, SDValue RetAddrFrIdx,
3341 EVT PtrVT, unsigned SlotSize,
3342 int FPDiff, const SDLoc &dl) {
3343 // Store the return address to the appropriate stack slot.
3344 if (!FPDiff) return Chain;
3345 // Calculate the new stack slot for the return address.
3346 int NewReturnAddrFI =
3347 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3348 false);
3349 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3350 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3351 MachinePointerInfo::getFixedStack(
3352 DAG.getMachineFunction(), NewReturnAddrFI));
3353 return Chain;
3354}
3355
3356/// Returns a vector_shuffle mask for an movs{s|d}, movd
3357/// operation of specified width.
3358static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3359 SDValue V2) {
3360 unsigned NumElems = VT.getVectorNumElements();
3361 SmallVector<int, 8> Mask;
3362 Mask.push_back(NumElems);
3363 for (unsigned i = 1; i != NumElems; ++i)
3364 Mask.push_back(i);
3365 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3366}
3367
3368SDValue
3369X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3370 SmallVectorImpl<SDValue> &InVals) const {
3371 SelectionDAG &DAG = CLI.DAG;
3372 SDLoc &dl = CLI.DL;
3373 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3374 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3375 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3376 SDValue Chain = CLI.Chain;
3377 SDValue Callee = CLI.Callee;
3378 CallingConv::ID CallConv = CLI.CallConv;
3379 bool &isTailCall = CLI.IsTailCall;
3380 bool isVarArg = CLI.IsVarArg;
3381
3382 MachineFunction &MF = DAG.getMachineFunction();
3383 bool Is64Bit = Subtarget.is64Bit();
3384 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3385 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3386 bool IsSibcall = false;
3387 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3388 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3389 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3390 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3391 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3392 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3393
3394 if (CallConv == CallingConv::X86_INTR)
3395 report_fatal_error("X86 interrupts may not be called directly");
3396
3397 if (Attr.getValueAsString() == "true")
3398 isTailCall = false;
3399
3400 if (Subtarget.isPICStyleGOT() &&
3401 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3402 // If we are using a GOT, disable tail calls to external symbols with
3403 // default visibility. Tail calling such a symbol requires using a GOT
3404 // relocation, which forces early binding of the symbol. This breaks code
3405 // that require lazy function symbol resolution. Using musttail or
3406 // GuaranteedTailCallOpt will override this.
3407 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3408 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3409 G->getGlobal()->hasDefaultVisibility()))
3410 isTailCall = false;
3411 }
3412
3413 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3414 if (IsMustTail) {
3415 // Force this to be a tail call. The verifier rules are enough to ensure
3416 // that we can lower this successfully without moving the return address
3417 // around.
3418 isTailCall = true;
3419 } else if (isTailCall) {
3420 // Check if it's really possible to do a tail call.
3421 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3422 isVarArg, SR != NotStructReturn,
3423 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3424 Outs, OutVals, Ins, DAG);
3425
3426 // Sibcalls are automatically detected tailcalls which do not require
3427 // ABI changes.
3428 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3429 IsSibcall = true;
3430
3431 if (isTailCall)
3432 ++NumTailCalls;
3433 }
3434
3435 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3436, __extension__ __PRETTY_FUNCTION__))
3436 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3436, __extension__ __PRETTY_FUNCTION__))
;
3437
3438 // Analyze operands of the call, assigning locations to each operand.
3439 SmallVector<CCValAssign, 16> ArgLocs;
3440 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3441
3442 // Allocate shadow area for Win64.
3443 if (IsWin64)
3444 CCInfo.AllocateStack(32, 8);
3445
3446 CCInfo.AnalyzeArguments(Outs, CC_X86);
3447
3448 // In vectorcall calling convention a second pass is required for the HVA
3449 // types.
3450 if (CallingConv::X86_VectorCall == CallConv) {
3451 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3452 }
3453
3454 // Get a count of how many bytes are to be pushed on the stack.
3455 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3456 if (IsSibcall)
3457 // This is a sibcall. The memory operands are available in caller's
3458 // own caller's stack.
3459 NumBytes = 0;
3460 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3461 canGuaranteeTCO(CallConv))
3462 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3463
3464 int FPDiff = 0;
3465 if (isTailCall && !IsSibcall && !IsMustTail) {
3466 // Lower arguments at fp - stackoffset + fpdiff.
3467 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3468
3469 FPDiff = NumBytesCallerPushed - NumBytes;
3470
3471 // Set the delta of movement of the returnaddr stackslot.
3472 // But only set if delta is greater than previous delta.
3473 if (FPDiff < X86Info->getTCReturnAddrDelta())
3474 X86Info->setTCReturnAddrDelta(FPDiff);
3475 }
3476
3477 unsigned NumBytesToPush = NumBytes;
3478 unsigned NumBytesToPop = NumBytes;
3479
3480 // If we have an inalloca argument, all stack space has already been allocated
3481 // for us and be right at the top of the stack. We don't support multiple
3482 // arguments passed in memory when using inalloca.
3483 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3484 NumBytesToPush = 0;
3485 if (!ArgLocs.back().isMemLoc())
3486 report_fatal_error("cannot use inalloca attribute on a register "
3487 "parameter");
3488 if (ArgLocs.back().getLocMemOffset() != 0)
3489 report_fatal_error("any parameter with the inalloca attribute must be "
3490 "the only memory argument");
3491 }
3492
3493 if (!IsSibcall)
3494 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3495 NumBytes - NumBytesToPush, dl);
3496
3497 SDValue RetAddrFrIdx;
3498 // Load return address for tail calls.
3499 if (isTailCall && FPDiff)
3500 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3501 Is64Bit, FPDiff, dl);
3502
3503 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3504 SmallVector<SDValue, 8> MemOpChains;
3505 SDValue StackPtr;
3506
3507 // The next loop assumes that the locations are in the same order of the
3508 // input arguments.
3509 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3510, __extension__ __PRETTY_FUNCTION__))
3510 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3510, __extension__ __PRETTY_FUNCTION__))
;
3511
3512 // Walk the register/memloc assignments, inserting copies/loads. In the case
3513 // of tail call optimization arguments are handle later.
3514 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3515 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3516 ++I, ++OutIndex) {
3517 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3517, __extension__ __PRETTY_FUNCTION__))
;
3518 // Skip inalloca arguments, they have already been written.
3519 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3520 if (Flags.isInAlloca())
3521 continue;
3522
3523 CCValAssign &VA = ArgLocs[I];
3524 EVT RegVT = VA.getLocVT();
3525 SDValue Arg = OutVals[OutIndex];
3526 bool isByVal = Flags.isByVal();
3527
3528 // Promote the value if needed.
3529 switch (VA.getLocInfo()) {
3530 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3530)
;
3531 case CCValAssign::Full: break;
3532 case CCValAssign::SExt:
3533 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3534 break;
3535 case CCValAssign::ZExt:
3536 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3537 break;
3538 case CCValAssign::AExt:
3539 if (Arg.getValueType().isVector() &&
3540 Arg.getValueType().getVectorElementType() == MVT::i1)
3541 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3542 else if (RegVT.is128BitVector()) {
3543 // Special case: passing MMX values in XMM registers.
3544 Arg = DAG.getBitcast(MVT::i64, Arg);
3545 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3546 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3547 } else
3548 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3549 break;
3550 case CCValAssign::BCvt:
3551 Arg = DAG.getBitcast(RegVT, Arg);
3552 break;
3553 case CCValAssign::Indirect: {
3554 // Store the argument.
3555 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3556 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3557 Chain = DAG.getStore(
3558 Chain, dl, Arg, SpillSlot,
3559 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3560 Arg = SpillSlot;
3561 break;
3562 }
3563 }
3564
3565 if (VA.needsCustom()) {
3566 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3567, __extension__ __PRETTY_FUNCTION__))
3567 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3567, __extension__ __PRETTY_FUNCTION__))
;
3568 // Split v64i1 value into two registers
3569 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3570 Subtarget);
3571 } else if (VA.isRegLoc()) {
3572 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3573 if (isVarArg && IsWin64) {
3574 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3575 // shadow reg if callee is a varargs function.
3576 unsigned ShadowReg = 0;
3577 switch (VA.getLocReg()) {
3578 case X86::XMM0: ShadowReg = X86::RCX; break;
3579 case X86::XMM1: ShadowReg = X86::RDX; break;
3580 case X86::XMM2: ShadowReg = X86::R8; break;
3581 case X86::XMM3: ShadowReg = X86::R9; break;
3582 }
3583 if (ShadowReg)
3584 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3585 }
3586 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3587 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3587, __extension__ __PRETTY_FUNCTION__))
;
3588 if (!StackPtr.getNode())
3589 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3590 getPointerTy(DAG.getDataLayout()));
3591 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3592 dl, DAG, VA, Flags));
3593 }
3594 }
3595
3596 if (!MemOpChains.empty())
3597 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3598
3599 if (Subtarget.isPICStyleGOT()) {
3600 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3601 // GOT pointer.
3602 if (!isTailCall) {
3603 RegsToPass.push_back(std::make_pair(
3604 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3605 getPointerTy(DAG.getDataLayout()))));
3606 } else {
3607 // If we are tail calling and generating PIC/GOT style code load the
3608 // address of the callee into ECX. The value in ecx is used as target of
3609 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3610 // for tail calls on PIC/GOT architectures. Normally we would just put the
3611 // address of GOT into ebx and then call target@PLT. But for tail calls
3612 // ebx would be restored (since ebx is callee saved) before jumping to the
3613 // target@PLT.
3614
3615 // Note: The actual moving to ECX is done further down.
3616 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3617 if (G && !G->getGlobal()->hasLocalLinkage() &&
3618 G->getGlobal()->hasDefaultVisibility())
3619 Callee = LowerGlobalAddress(Callee, DAG);
3620 else if (isa<ExternalSymbolSDNode>(Callee))
3621 Callee = LowerExternalSymbol(Callee, DAG);
3622 }
3623 }
3624
3625 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3626 // From AMD64 ABI document:
3627 // For calls that may call functions that use varargs or stdargs
3628 // (prototype-less calls or calls to functions containing ellipsis (...) in
3629 // the declaration) %al is used as hidden argument to specify the number
3630 // of SSE registers used. The contents of %al do not need to match exactly
3631 // the number of registers, but must be an ubound on the number of SSE
3632 // registers used and is in the range 0 - 8 inclusive.
3633
3634 // Count the number of XMM registers allocated.
3635 static const MCPhysReg XMMArgRegs[] = {
3636 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3637 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3638 };
3639 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3640 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3641, __extension__ __PRETTY_FUNCTION__))
3641 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3641, __extension__ __PRETTY_FUNCTION__))
;
3642
3643 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3644 DAG.getConstant(NumXMMRegs, dl,
3645 MVT::i8)));
3646 }
3647
3648 if (isVarArg && IsMustTail) {
3649 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3650 for (const auto &F : Forwards) {
3651 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3652 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3653 }
3654 }
3655
3656 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3657 // don't need this because the eligibility check rejects calls that require
3658 // shuffling arguments passed in memory.
3659 if (!IsSibcall && isTailCall) {
3660 // Force all the incoming stack arguments to be loaded from the stack
3661 // before any new outgoing arguments are stored to the stack, because the
3662 // outgoing stack slots may alias the incoming argument stack slots, and
3663 // the alias isn't otherwise explicit. This is slightly more conservative
3664 // than necessary, because it means that each store effectively depends
3665 // on every argument instead of just those arguments it would clobber.
3666 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3667
3668 SmallVector<SDValue, 8> MemOpChains2;
3669 SDValue FIN;
3670 int FI = 0;
3671 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3672 ++I, ++OutsIndex) {
3673 CCValAssign &VA = ArgLocs[I];
3674
3675 if (VA.isRegLoc()) {
3676 if (VA.needsCustom()) {
3677 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3678, __extension__ __PRETTY_FUNCTION__))
3678 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3678, __extension__ __PRETTY_FUNCTION__))
;
3679 // This means that we are in special case where one argument was
3680 // passed through two register locations - Skip the next location
3681 ++I;
3682 }
3683
3684 continue;
3685 }
3686
3687 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3687, __extension__ __PRETTY_FUNCTION__))
;
3688 SDValue Arg = OutVals[OutsIndex];
3689 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3690 // Skip inalloca arguments. They don't require any work.
3691 if (Flags.isInAlloca())
3692 continue;
3693 // Create frame index.
3694 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3695 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3696 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3697 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3698
3699 if (Flags.isByVal()) {
3700 // Copy relative to framepointer.
3701 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3702 if (!StackPtr.getNode())
3703 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3704 getPointerTy(DAG.getDataLayout()));
3705 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3706 StackPtr, Source);
3707
3708 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3709 ArgChain,
3710 Flags, DAG, dl));
3711 } else {
3712 // Store relative to framepointer.
3713 MemOpChains2.push_back(DAG.getStore(
3714 ArgChain, dl, Arg, FIN,
3715 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3716 }
3717 }
3718
3719 if (!MemOpChains2.empty())
3720 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3721
3722 // Store the return address to the appropriate stack slot.
3723 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3724 getPointerTy(DAG.getDataLayout()),
3725 RegInfo->getSlotSize(), FPDiff, dl);
3726 }
3727
3728 // Build a sequence of copy-to-reg nodes chained together with token chain
3729 // and flag operands which copy the outgoing args into registers.
3730 SDValue InFlag;
3731 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3732 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3733 RegsToPass[i].second, InFlag);
3734 InFlag = Chain.getValue(1);
3735 }
3736
3737 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3738 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3738, __extension__ __PRETTY_FUNCTION__))
;
3739 // In the 64-bit large code model, we have to make all calls
3740 // through a register, since the call instruction's 32-bit
3741 // pc-relative offset may not be large enough to hold the whole
3742 // address.
3743 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3744 // If the callee is a GlobalAddress node (quite common, every direct call
3745 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3746 // it.
3747 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3748
3749 // We should use extra load for direct calls to dllimported functions in
3750 // non-JIT mode.
3751 const GlobalValue *GV = G->getGlobal();
3752 if (!GV->hasDLLImportStorageClass()) {
3753 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3754
3755 Callee = DAG.getTargetGlobalAddress(
3756 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3757
3758 if (OpFlags == X86II::MO_GOTPCREL) {
3759 // Add a wrapper.
3760 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3761 getPointerTy(DAG.getDataLayout()), Callee);
3762 // Add extra indirection
3763 Callee = DAG.getLoad(
3764 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3765 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3766 }
3767 }
3768 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3769 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
3770 unsigned char OpFlags =
3771 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3772
3773 Callee = DAG.getTargetExternalSymbol(
3774 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3775 } else if (Subtarget.isTarget64BitILP32() &&
3776 Callee->getValueType(0) == MVT::i32) {
3777 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3778 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3779 }
3780
3781 // Returns a chain & a flag for retval copy to use.
3782 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3783 SmallVector<SDValue, 8> Ops;
3784
3785 if (!IsSibcall && isTailCall) {
3786 Chain = DAG.getCALLSEQ_END(Chain,
3787 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3788 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3789 InFlag = Chain.getValue(1);
3790 }
3791
3792 Ops.push_back(Chain);
3793 Ops.push_back(Callee);
3794
3795 if (isTailCall)
3796 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3797
3798 // Add argument registers to the end of the list so that they are known live
3799 // into the call.
3800 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3801 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3802 RegsToPass[i].second.getValueType()));
3803
3804 // Add a register mask operand representing the call-preserved registers.
3805 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3806 // set X86_INTR calling convention because it has the same CSR mask
3807 // (same preserved registers).
3808 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3809 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3810 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 3810, __extension__ __PRETTY_FUNCTION__))
;
3811
3812 // If this is an invoke in a 32-bit function using a funclet-based
3813 // personality, assume the function clobbers all registers. If an exception
3814 // is thrown, the runtime will not restore CSRs.
3815 // FIXME: Model this more precisely so that we can register allocate across
3816 // the normal edge and spill and fill across the exceptional edge.
3817 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
3818 const Function &CallerFn = MF.getFunction();
3819 EHPersonality Pers =
3820 CallerFn.hasPersonalityFn()
3821 ? classifyEHPersonality(CallerFn.getPersonalityFn())
3822 : EHPersonality::Unknown;
3823 if (isFuncletEHPersonality(Pers))
3824 Mask = RegInfo->getNoPreservedMask();
3825 }
3826
3827 // Define a new register mask from the existing mask.
3828 uint32_t *RegMask = nullptr;
3829
3830 // In some calling conventions we need to remove the used physical registers
3831 // from the reg mask.
3832 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
3833 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3834
3835 // Allocate a new Reg Mask and copy Mask.
3836 RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
3837 unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
3838 memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
3839
3840 // Make sure all sub registers of the argument registers are reset
3841 // in the RegMask.
3842 for (auto const &RegPair : RegsToPass)
3843 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
3844 SubRegs.isValid(); ++SubRegs)
3845 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3846
3847 // Create the RegMask Operand according to our updated mask.
3848 Ops.push_back(DAG.getRegisterMask(RegMask));
3849 } else {
3850 // Create the RegMask Operand according to the static mask.
3851 Ops.push_back(DAG.getRegisterMask(Mask));
3852 }
3853
3854 if (InFlag.getNode())
3855 Ops.push_back(InFlag);
3856
3857 if (isTailCall) {
3858 // We used to do:
3859 //// If this is the first return lowered for this function, add the regs
3860 //// to the liveout set for the function.
3861 // This isn't right, although it's probably harmless on x86; liveouts
3862 // should be computed from returns not tail calls. Consider a void
3863 // function making a tail call to a function returning int.
3864 MF.getFrameInfo().setHasTailCall();
3865 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3866 }
3867
3868 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3869 InFlag = Chain.getValue(1);
3870
3871 // Create the CALLSEQ_END node.
3872 unsigned NumBytesForCalleeToPop;
3873 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3874 DAG.getTarget().Options.GuaranteedTailCallOpt))
3875 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3876 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3877 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3878 SR == StackStructReturn)
3879 // If this is a call to a struct-return function, the callee
3880 // pops the hidden struct pointer, so we have to push it back.
3881 // This is common for Darwin/X86, Linux & Mingw32 targets.
3882 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3883 NumBytesForCalleeToPop = 4;
3884 else
3885 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3886
3887 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
3888 // No need to reset the stack after the call if the call doesn't return. To
3889 // make the MI verify, we'll pretend the callee does it for us.
3890 NumBytesForCalleeToPop = NumBytes;
3891 }
3892
3893 // Returns a flag for retval copy to use.
3894 if (!IsSibcall) {
3895 Chain = DAG.getCALLSEQ_END(Chain,
3896 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3897 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3898 true),
3899 InFlag, dl);
3900 InFlag = Chain.getValue(1);
3901 }
3902
3903 // Handle result values, copying them out of physregs into vregs that we
3904 // return.
3905 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
3906 InVals, RegMask);
3907}
3908
3909//===----------------------------------------------------------------------===//
3910// Fast Calling Convention (tail call) implementation
3911//===----------------------------------------------------------------------===//
3912
3913// Like std call, callee cleans arguments, convention except that ECX is
3914// reserved for storing the tail called function address. Only 2 registers are
3915// free for argument passing (inreg). Tail call optimization is performed
3916// provided:
3917// * tailcallopt is enabled
3918// * caller/callee are fastcc
3919// On X86_64 architecture with GOT-style position independent code only local
3920// (within module) calls are supported at the moment.
3921// To keep the stack aligned according to platform abi the function
3922// GetAlignedArgumentStackSize ensures that argument delta is always multiples
3923// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3924// If a tail called function callee has more arguments than the caller the
3925// caller needs to make sure that there is room to move the RETADDR to. This is
3926// achieved by reserving an area the size of the argument delta right after the
3927// original RETADDR, but before the saved framepointer or the spilled registers
3928// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3929// stack layout:
3930// arg1
3931// arg2
3932// RETADDR
3933// [ new RETADDR
3934// move area ]
3935// (possible EBP)
3936// ESI
3937// EDI
3938// local1 ..
3939
3940/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
3941/// requirement.
3942unsigned
3943X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3944 SelectionDAG& DAG) const {
3945 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3946 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3947 unsigned StackAlignment = TFI.getStackAlignment();
3948 uint64_t AlignMask = StackAlignment - 1;
3949 int64_t Offset = StackSize;
3950 unsigned SlotSize = RegInfo->getSlotSize();
3951 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3952 // Number smaller than 12 so just add the difference.
3953 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3954 } else {
3955 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3956 Offset = ((~AlignMask) & Offset) + StackAlignment +
3957 (StackAlignment-SlotSize);
3958 }
3959 return Offset;
3960}
3961
3962/// Return true if the given stack call argument is already available in the
3963/// same position (relatively) of the caller's incoming argument stack.
3964static
3965bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3966 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
3967 const X86InstrInfo *TII, const CCValAssign &VA) {
3968 unsigned Bytes = Arg.getValueSizeInBits() / 8;
3969
3970 for (;;) {
3971 // Look through nodes that don't alter the bits of the incoming value.
3972 unsigned Op = Arg.getOpcode();
3973 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
3974 Arg = Arg.getOperand(0);
3975 continue;
3976 }
3977 if (Op == ISD::TRUNCATE) {
3978 const SDValue &TruncInput = Arg.getOperand(0);
3979 if (TruncInput.getOpcode() == ISD::AssertZext &&
3980 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
3981 Arg.getValueType()) {
3982 Arg = TruncInput.getOperand(0);
3983 continue;
3984 }
3985 }
3986 break;
3987 }
3988
3989 int FI = INT_MAX2147483647;
3990 if (Arg.getOpcode() == ISD::CopyFromReg) {
3991 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3992 if (!TargetRegisterInfo::isVirtualRegister(VR))
3993 return false;
3994 MachineInstr *Def = MRI->getVRegDef(VR);
3995 if (!Def)
3996 return false;
3997 if (!Flags.isByVal()) {
3998 if (!TII->isLoadFromStackSlot(*Def, FI))
3999 return false;
4000 } else {
4001 unsigned Opcode = Def->getOpcode();
4002 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4003 Opcode == X86::LEA64_32r) &&
4004 Def->getOperand(1).isFI()) {
4005 FI = Def->getOperand(1).getIndex();
4006 Bytes = Flags.getByValSize();
4007 } else
4008 return false;
4009 }
4010 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4011 if (Flags.isByVal())
4012 // ByVal argument is passed in as a pointer but it's now being
4013 // dereferenced. e.g.
4014 // define @foo(%struct.X* %A) {
4015 // tail call @bar(%struct.X* byval %A)
4016 // }
4017 return false;
4018 SDValue Ptr = Ld->getBasePtr();
4019 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4020 if (!FINode)
4021 return false;
4022 FI = FINode->getIndex();
4023 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4024 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4025 FI = FINode->getIndex();
4026 Bytes = Flags.getByValSize();
4027 } else
4028 return false;
4029
4030 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4030, __extension__ __PRETTY_FUNCTION__))
;
4031 if (!MFI.isFixedObjectIndex(FI))
4032 return false;
4033
4034 if (Offset != MFI.getObjectOffset(FI))
4035 return false;
4036
4037 // If this is not byval, check that the argument stack object is immutable.
4038 // inalloca and argument copy elision can create mutable argument stack
4039 // objects. Byval objects can be mutated, but a byval call intends to pass the
4040 // mutated memory.
4041 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4042 return false;
4043
4044 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4045 // If the argument location is wider than the argument type, check that any
4046 // extension flags match.
4047 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4048 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4049 return false;
4050 }
4051 }
4052
4053 return Bytes == MFI.getObjectSize(FI);
4054}
4055
4056/// Check whether the call is eligible for tail call optimization. Targets
4057/// that want to do tail call optimization should implement this function.
4058bool X86TargetLowering::IsEligibleForTailCallOptimization(
4059 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4060 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4061 const SmallVectorImpl<ISD::OutputArg> &Outs,
4062 const SmallVectorImpl<SDValue> &OutVals,
4063 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4064 if (!mayTailCallThisCC(CalleeCC))
4065 return false;
4066
4067 // If -tailcallopt is specified, make fastcc functions tail-callable.
4068 MachineFunction &MF = DAG.getMachineFunction();
4069 const Function &CallerF = MF.getFunction();
4070
4071 // If the function return type is x86_fp80 and the callee return type is not,
4072 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4073 // perform a tailcall optimization here.
4074 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4075 return false;
4076
4077 CallingConv::ID CallerCC = CallerF.getCallingConv();
4078 bool CCMatch = CallerCC == CalleeCC;
4079 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4080 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4081
4082 // Win64 functions have extra shadow space for argument homing. Don't do the
4083 // sibcall if the caller and callee have mismatched expectations for this
4084 // space.
4085 if (IsCalleeWin64 != IsCallerWin64)
4086 return false;
4087
4088 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4089 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4090 return true;
4091 return false;
4092 }
4093
4094 // Look for obvious safe cases to perform tail call optimization that do not
4095 // require ABI changes. This is what gcc calls sibcall.
4096
4097 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4098 // emit a special epilogue.
4099 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4100 if (RegInfo->needsStackRealignment(MF))
4101 return false;
4102
4103 // Also avoid sibcall optimization if either caller or callee uses struct
4104 // return semantics.
4105 if (isCalleeStructRet || isCallerStructRet)
4106 return false;
4107
4108 // Do not sibcall optimize vararg calls unless all arguments are passed via
4109 // registers.
4110 LLVMContext &C = *DAG.getContext();
4111 if (isVarArg && !Outs.empty()) {
4112 // Optimizing for varargs on Win64 is unlikely to be safe without
4113 // additional testing.
4114 if (IsCalleeWin64 || IsCallerWin64)
4115 return false;
4116
4117 SmallVector<CCValAssign, 16> ArgLocs;
4118 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4119
4120 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4121 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4122 if (!ArgLocs[i].isRegLoc())
4123 return false;
4124 }
4125
4126 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4127 // stack. Therefore, if it's not used by the call it is not safe to optimize
4128 // this into a sibcall.
4129 bool Unused = false;
4130 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4131 if (!Ins[i].Used) {
4132 Unused = true;
4133 break;
4134 }
4135 }
4136 if (Unused) {
4137 SmallVector<CCValAssign, 16> RVLocs;
4138 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4139 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4140 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4141 CCValAssign &VA = RVLocs[i];
4142 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4143 return false;
4144 }
4145 }
4146
4147 // Check that the call results are passed in the same way.
4148 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4149 RetCC_X86, RetCC_X86))
4150 return false;
4151 // The callee has to preserve all registers the caller needs to preserve.
4152 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4153 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4154 if (!CCMatch) {
4155 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4156 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4157 return false;
4158 }
4159
4160 unsigned StackArgsSize = 0;
4161
4162 // If the callee takes no arguments then go on to check the results of the
4163 // call.
4164 if (!Outs.empty()) {
4165 // Check if stack adjustment is needed. For now, do not do this if any
4166 // argument is passed on the stack.
4167 SmallVector<CCValAssign, 16> ArgLocs;
4168 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4169
4170 // Allocate shadow area for Win64
4171 if (IsCalleeWin64)
4172 CCInfo.AllocateStack(32, 8);
4173
4174 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4175 StackArgsSize = CCInfo.getNextStackOffset();
4176
4177 if (CCInfo.getNextStackOffset()) {
4178 // Check if the arguments are already laid out in the right way as
4179 // the caller's fixed stack objects.
4180 MachineFrameInfo &MFI = MF.getFrameInfo();
4181 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4182 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4183 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4184 CCValAssign &VA = ArgLocs[i];
4185 SDValue Arg = OutVals[i];
4186 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4187 if (VA.getLocInfo() == CCValAssign::Indirect)
4188 return false;
4189 if (!VA.isRegLoc()) {
4190 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4191 MFI, MRI, TII, VA))
4192 return false;
4193 }
4194 }
4195 }
4196
4197 bool PositionIndependent = isPositionIndependent();
4198 // If the tailcall address may be in a register, then make sure it's
4199 // possible to register allocate for it. In 32-bit, the call address can
4200 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4201 // callee-saved registers are restored. These happen to be the same
4202 // registers used to pass 'inreg' arguments so watch out for those.
4203 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4204 !isa<ExternalSymbolSDNode>(Callee)) ||
4205 PositionIndependent)) {
4206 unsigned NumInRegs = 0;
4207 // In PIC we need an extra register to formulate the address computation
4208 // for the callee.
4209 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4210
4211 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4212 CCValAssign &VA = ArgLocs[i];
4213 if (!VA.isRegLoc())
4214 continue;
4215 unsigned Reg = VA.getLocReg();
4216 switch (Reg) {
4217 default: break;
4218 case X86::EAX: case X86::EDX: case X86::ECX:
4219 if (++NumInRegs == MaxInRegs)
4220 return false;
4221 break;
4222 }
4223 }
4224 }
4225
4226 const MachineRegisterInfo &MRI = MF.getRegInfo();
4227 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4228 return false;
4229 }
4230
4231 bool CalleeWillPop =
4232 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4233 MF.getTarget().Options.GuaranteedTailCallOpt);
4234
4235 if (unsigned BytesToPop =
4236 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4237 // If we have bytes to pop, the callee must pop them.
4238 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4239 if (!CalleePopMatches)
4240 return false;
4241 } else if (CalleeWillPop && StackArgsSize > 0) {
4242 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4243 return false;
4244 }
4245
4246 return true;
4247}
4248
4249FastISel *
4250X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4251 const TargetLibraryInfo *libInfo) const {
4252 return X86::createFastISel(funcInfo, libInfo);
4253}
4254
4255//===----------------------------------------------------------------------===//
4256// Other Lowering Hooks
4257//===----------------------------------------------------------------------===//
4258
4259static bool MayFoldLoad(SDValue Op) {
4260 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4261}
4262
4263static bool MayFoldIntoStore(SDValue Op) {
4264 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4265}
4266
4267static bool MayFoldIntoZeroExtend(SDValue Op) {
4268 if (Op.hasOneUse()) {
4269 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4270 return (ISD::ZERO_EXTEND == Opcode);
4271 }
4272 return false;
4273}
4274
4275static bool isTargetShuffle(unsigned Opcode) {
4276 switch(Opcode) {
4277 default: return false;
4278 case X86ISD::BLENDI:
4279 case X86ISD::PSHUFB:
4280 case X86ISD::PSHUFD:
4281 case X86ISD::PSHUFHW:
4282 case X86ISD::PSHUFLW:
4283 case X86ISD::SHUFP:
4284 case X86ISD::INSERTPS:
4285 case X86ISD::EXTRQI:
4286 case X86ISD::INSERTQI:
4287 case X86ISD::PALIGNR:
4288 case X86ISD::VSHLDQ:
4289 case X86ISD::VSRLDQ:
4290 case X86ISD::MOVLHPS:
4291 case X86ISD::MOVHLPS:
4292 case X86ISD::MOVLPS:
4293 case X86ISD::MOVLPD:
4294 case X86ISD::MOVSHDUP:
4295 case X86ISD::MOVSLDUP:
4296 case X86ISD::MOVDDUP:
4297 case X86ISD::MOVSS:
4298 case X86ISD::MOVSD:
4299 case X86ISD::UNPCKL:
4300 case X86ISD::UNPCKH:
4301 case X86ISD::VBROADCAST:
4302 case X86ISD::VPERMILPI:
4303 case X86ISD::VPERMILPV:
4304 case X86ISD::VPERM2X128:
4305 case X86ISD::VPERMIL2:
4306 case X86ISD::VPERMI:
4307 case X86ISD::VPPERM:
4308 case X86ISD::VPERMV:
4309 case X86ISD::VPERMV3:
4310 case X86ISD::VPERMIV3:
4311 case X86ISD::VZEXT_MOVL:
4312 return true;
4313 }
4314}
4315
4316static bool isTargetShuffleVariableMask(unsigned Opcode) {
4317 switch (Opcode) {
4318 default: return false;
4319 // Target Shuffles.
4320 case X86ISD::PSHUFB:
4321 case X86ISD::VPERMILPV:
4322 case X86ISD::VPERMIL2:
4323 case X86ISD::VPPERM:
4324 case X86ISD::VPERMV:
4325 case X86ISD::VPERMV3:
4326 case X86ISD::VPERMIV3:
4327 return true;
4328 // 'Faux' Target Shuffles.
4329 case ISD::AND:
4330 case X86ISD::ANDNP:
4331 return true;
4332 }
4333}
4334
4335SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4336 MachineFunction &MF = DAG.getMachineFunction();
4337 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4338 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4339 int ReturnAddrIndex = FuncInfo->getRAIndex();
4340
4341 if (ReturnAddrIndex == 0) {
4342 // Set up a frame object for the return address.
4343 unsigned SlotSize = RegInfo->getSlotSize();
4344 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4345 -(int64_t)SlotSize,
4346 false);
4347 FuncInfo->setRAIndex(ReturnAddrIndex);
4348 }
4349
4350 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4351}
4352
4353bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4354 bool hasSymbolicDisplacement) {
4355 // Offset should fit into 32 bit immediate field.
4356 if (!isInt<32>(Offset))
4357 return false;
4358
4359 // If we don't have a symbolic displacement - we don't have any extra
4360 // restrictions.
4361 if (!hasSymbolicDisplacement)
4362 return true;
4363
4364 // FIXME: Some tweaks might be needed for medium code model.
4365 if (M != CodeModel::Small && M != CodeModel::Kernel)
4366 return false;
4367
4368 // For small code model we assume that latest object is 16MB before end of 31
4369 // bits boundary. We may also accept pretty large negative constants knowing
4370 // that all objects are in the positive half of address space.
4371 if (M == CodeModel::Small && Offset < 16*1024*1024)
4372 return true;
4373
4374 // For kernel code model we know that all object resist in the negative half
4375 // of 32bits address space. We may not accept negative offsets, since they may
4376 // be just off and we may accept pretty large positive ones.
4377 if (M == CodeModel::Kernel && Offset >= 0)
4378 return true;
4379
4380 return false;
4381}
4382
4383/// Determines whether the callee is required to pop its own arguments.
4384/// Callee pop is necessary to support tail calls.
4385bool X86::isCalleePop(CallingConv::ID CallingConv,
4386 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4387 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4388 // can guarantee TCO.
4389 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4390 return true;
4391
4392 switch (CallingConv) {
4393 default:
4394 return false;
4395 case CallingConv::X86_StdCall:
4396 case CallingConv::X86_FastCall:
4397 case CallingConv::X86_ThisCall:
4398 case CallingConv::X86_VectorCall:
4399 return !is64Bit;
4400 }
4401}
4402
4403/// \brief Return true if the condition is an unsigned comparison operation.
4404static bool isX86CCUnsigned(unsigned X86CC) {
4405 switch (X86CC) {
4406 default:
4407 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4407)
;
4408 case X86::COND_E:
4409 case X86::COND_NE:
4410 case X86::COND_B:
4411 case X86::COND_A:
4412 case X86::COND_BE:
4413 case X86::COND_AE:
4414 return true;
4415 case X86::COND_G:
4416 case X86::COND_GE:
4417 case X86::COND_L:
4418 case X86::COND_LE:
4419 return false;
4420 }
4421}
4422
4423static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4424 switch (SetCCOpcode) {
4425 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4425)
;
4426 case ISD::SETEQ: return X86::COND_E;
4427 case ISD::SETGT: return X86::COND_G;
4428 case ISD::SETGE: return X86::COND_GE;
4429 case ISD::SETLT: return X86::COND_L;
4430 case ISD::SETLE: return X86::COND_LE;
4431 case ISD::SETNE: return X86::COND_NE;
4432 case ISD::SETULT: return X86::COND_B;
4433 case ISD::SETUGT: return X86::COND_A;
4434 case ISD::SETULE: return X86::COND_BE;
4435 case ISD::SETUGE: return X86::COND_AE;
4436 }
4437}
4438
4439/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4440/// condition code, returning the condition code and the LHS/RHS of the
4441/// comparison to make.
4442static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4443 bool isFP, SDValue &LHS, SDValue &RHS,
4444 SelectionDAG &DAG) {
4445 if (!isFP) {
4446 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4447 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4448 // X > -1 -> X == 0, jump !sign.
4449 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4450 return X86::COND_NS;
4451 }
4452 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4453 // X < 0 -> X == 0, jump on sign.
4454 return X86::COND_S;
4455 }
4456 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4457 // X < 1 -> X <= 0
4458 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4459 return X86::COND_LE;
4460 }
4461 }
4462
4463 return TranslateIntegerX86CC(SetCCOpcode);
4464 }
4465
4466 // First determine if it is required or is profitable to flip the operands.
4467
4468 // If LHS is a foldable load, but RHS is not, flip the condition.
4469 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4470 !ISD::isNON_EXTLoad(RHS.getNode())) {
4471 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4472 std::swap(LHS, RHS);
4473 }
4474
4475 switch (SetCCOpcode) {
4476 default: break;
4477 case ISD::SETOLT:
4478 case ISD::SETOLE:
4479 case ISD::SETUGT:
4480 case ISD::SETUGE:
4481 std::swap(LHS, RHS);
4482 break;
4483 }
4484
4485 // On a floating point condition, the flags are set as follows:
4486 // ZF PF CF op
4487 // 0 | 0 | 0 | X > Y
4488 // 0 | 0 | 1 | X < Y
4489 // 1 | 0 | 0 | X == Y
4490 // 1 | 1 | 1 | unordered
4491 switch (SetCCOpcode) {
4492 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4492)
;
4493 case ISD::SETUEQ:
4494 case ISD::SETEQ: return X86::COND_E;
4495 case ISD::SETOLT: // flipped
4496 case ISD::SETOGT:
4497 case ISD::SETGT: return X86::COND_A;
4498 case ISD::SETOLE: // flipped
4499 case ISD::SETOGE:
4500 case ISD::SETGE: return X86::COND_AE;
4501 case ISD::SETUGT: // flipped
4502 case ISD::SETULT:
4503 case ISD::SETLT: return X86::COND_B;
4504 case ISD::SETUGE: // flipped
4505 case ISD::SETULE:
4506 case ISD::SETLE: return X86::COND_BE;
4507 case ISD::SETONE:
4508 case ISD::SETNE: return X86::COND_NE;
4509 case ISD::SETUO: return X86::COND_P;
4510 case ISD::SETO: return X86::COND_NP;
4511 case ISD::SETOEQ:
4512 case ISD::SETUNE: return X86::COND_INVALID;
4513 }
4514}
4515
4516/// Is there a floating point cmov for the specific X86 condition code?
4517/// Current x86 isa includes the following FP cmov instructions:
4518/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4519static bool hasFPCMov(unsigned X86CC) {
4520 switch (X86CC) {
4521 default:
4522 return false;
4523 case X86::COND_B:
4524 case X86::COND_BE:
4525 case X86::COND_E:
4526 case X86::COND_P:
4527 case X86::COND_A:
4528 case X86::COND_AE:
4529 case X86::COND_NE:
4530 case X86::COND_NP:
4531 return true;
4532 }
4533}
4534
4535
4536bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4537 const CallInst &I,
4538 MachineFunction &MF,
4539 unsigned Intrinsic) const {
4540
4541 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4542 if (!IntrData)
4543 return false;
4544
4545 Info.opc = ISD::INTRINSIC_W_CHAIN;
4546 Info.flags = MachineMemOperand::MONone;
4547 Info.offset = 0;
4548
4549 switch (IntrData->Type) {
4550 case EXPAND_FROM_MEM: {
4551 Info.ptrVal = I.getArgOperand(0);
4552 Info.memVT = MVT::getVT(I.getType());
4553 Info.align = 1;
4554 Info.flags |= MachineMemOperand::MOLoad;
4555 break;
4556 }
4557 case COMPRESS_TO_MEM: {
4558 Info.ptrVal = I.getArgOperand(0);
4559 Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
4560 Info.align = 1;
4561 Info.flags |= MachineMemOperand::MOStore;
4562 break;
4563 }
4564 case TRUNCATE_TO_MEM_VI8:
4565 case TRUNCATE_TO_MEM_VI16:
4566 case TRUNCATE_TO_MEM_VI32: {
4567 Info.ptrVal = I.getArgOperand(0);
4568 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4569 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4570 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4571 ScalarVT = MVT::i8;
4572 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4573 ScalarVT = MVT::i16;
4574 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4575 ScalarVT = MVT::i32;
4576
4577 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4578 Info.align = 1;
4579 Info.flags |= MachineMemOperand::MOStore;
4580 break;
4581 }
4582 default:
4583 return false;
4584 }
4585
4586 return true;
4587}
4588
4589/// Returns true if the target can instruction select the
4590/// specified FP immediate natively. If false, the legalizer will
4591/// materialize the FP immediate as a load from a constant pool.
4592bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4593 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4594 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4595 return true;
4596 }
4597 return false;
4598}
4599
4600bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4601 ISD::LoadExtType ExtTy,
4602 EVT NewVT) const {
4603 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4604 // relocation target a movq or addq instruction: don't let the load shrink.
4605 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4606 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4607 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4608 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4609 return true;
4610}
4611
4612/// \brief Returns true if it is beneficial to convert a load of a constant
4613/// to just the constant itself.
4614bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4615 Type *Ty) const {
4616 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4616, __extension__ __PRETTY_FUNCTION__))
;
4617
4618 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4619 if (BitSize == 0 || BitSize > 64)
4620 return false;
4621 return true;
4622}
4623
4624bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4625 // TODO: It might be a win to ease or lift this restriction, but the generic
4626 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4627 if (VT.isVector() && Subtarget.hasAVX512())
4628 return false;
4629
4630 return true;
4631}
4632
4633bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4634 unsigned Index) const {
4635 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4636 return false;
4637
4638 // Mask vectors support all subregister combinations and operations that
4639 // extract half of vector.
4640 if (ResVT.getVectorElementType() == MVT::i1)
4641 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4642 (Index == ResVT.getVectorNumElements()));
4643
4644 return (Index % ResVT.getVectorNumElements()) == 0;
4645}
4646
4647bool X86TargetLowering::isCheapToSpeculateCttz() const {
4648 // Speculate cttz only if we can directly use TZCNT.
4649 return Subtarget.hasBMI();
4650}
4651
4652bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4653 // Speculate ctlz only if we can directly use LZCNT.
4654 return Subtarget.hasLZCNT();
4655}
4656
4657bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
4658 EVT BitcastVT) const {
4659 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
4660 return false;
4661
4662 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
4663}
4664
4665bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4666 const SelectionDAG &DAG) const {
4667 // Do not merge to float value size (128 bytes) if no implicit
4668 // float attribute is set.
4669 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
4670 Attribute::NoImplicitFloat);
4671
4672 if (NoFloat) {
4673 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4674 return (MemVT.getSizeInBits() <= MaxIntSize);
4675 }
4676 return true;
4677}
4678
4679bool X86TargetLowering::isCtlzFast() const {
4680 return Subtarget.hasFastLZCNT();
4681}
4682
4683bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4684 const Instruction &AndI) const {
4685 return true;
4686}
4687
4688bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4689 if (!Subtarget.hasBMI())
4690 return false;
4691
4692 // There are only 32-bit and 64-bit forms for 'andn'.
4693 EVT VT = Y.getValueType();
4694 if (VT != MVT::i32 && VT != MVT::i64)
4695 return false;
4696
4697 return true;
4698}
4699
4700MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
4701 MVT VT = MVT::getIntegerVT(NumBits);
4702 if (isTypeLegal(VT))
4703 return VT;
4704
4705 // PMOVMSKB can handle this.
4706 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
4707 return MVT::v16i8;
4708
4709 // VPMOVMSKB can handle this.
4710 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
4711 return MVT::v32i8;
4712
4713 // TODO: Allow 64-bit type for 32-bit target.
4714 // TODO: 512-bit types should be allowed, but make sure that those
4715 // cases are handled in combineVectorSizedSetCCEquality().
4716
4717 return MVT::INVALID_SIMPLE_VALUE_TYPE;
4718}
4719
4720/// Val is the undef sentinel value or equal to the specified value.
4721static bool isUndefOrEqual(int Val, int CmpVal) {
4722 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
4723}
4724
4725/// Val is either the undef or zero sentinel value.
4726static bool isUndefOrZero(int Val) {
4727 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
4728}
4729
4730/// Return true if every element in Mask, beginning
4731/// from position Pos and ending in Pos+Size is the undef sentinel value.
4732static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
4733 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4734 if (Mask[i] != SM_SentinelUndef)
4735 return false;
4736 return true;
4737}
4738
4739/// Return true if Val is undef or if its value falls within the
4740/// specified range (L, H].
4741static bool isUndefOrInRange(int Val, int Low, int Hi) {
4742 return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
4743}
4744
4745/// Return true if every element in Mask is undef or if its value
4746/// falls within the specified range (L, H].
4747static bool isUndefOrInRange(ArrayRef<int> Mask,
4748 int Low, int Hi) {
4749 for (int M : Mask)
4750 if (!isUndefOrInRange(M, Low, Hi))
4751 return false;
4752 return true;
4753}
4754
4755/// Return true if Val is undef, zero or if its value falls within the
4756/// specified range (L, H].
4757static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
4758 return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
4759}
4760
4761/// Return true if every element in Mask is undef, zero or if its value
4762/// falls within the specified range (L, H].
4763static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
4764 for (int M : Mask)
4765 if (!isUndefOrZeroOrInRange(M, Low, Hi))
4766 return false;
4767 return true;
4768}
4769
4770/// Return true if every element in Mask, beginning
4771/// from position Pos and ending in Pos+Size, falls within the specified
4772/// sequential range (Low, Low+Size]. or is undef.
4773static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
4774 unsigned Pos, unsigned Size, int Low) {
4775 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
4776 if (!isUndefOrEqual(Mask[i], Low))
4777 return false;
4778 return true;
4779}
4780
4781/// Return true if every element in Mask, beginning
4782/// from position Pos and ending in Pos+Size, falls within the specified
4783/// sequential range (Low, Low+Size], or is undef or is zero.
4784static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4785 unsigned Size, int Low) {
4786 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
4787 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
4788 return false;
4789 return true;
4790}
4791
4792/// Return true if every element in Mask, beginning
4793/// from position Pos and ending in Pos+Size is undef or is zero.
4794static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4795 unsigned Size) {
4796 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4797 if (!isUndefOrZero(Mask[i]))
4798 return false;
4799 return true;
4800}
4801
4802/// \brief Helper function to test whether a shuffle mask could be
4803/// simplified by widening the elements being shuffled.
4804///
4805/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
4806/// leaves it in an unspecified state.
4807///
4808/// NOTE: This must handle normal vector shuffle masks and *target* vector
4809/// shuffle masks. The latter have the special property of a '-2' representing
4810/// a zero-ed lane of a vector.
4811static bool canWidenShuffleElements(ArrayRef<int> Mask,
4812 SmallVectorImpl<int> &WidenedMask) {
4813 WidenedMask.assign(Mask.size() / 2, 0);
4814 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
4815 int M0 = Mask[i];
4816 int M1 = Mask[i + 1];
4817
4818 // If both elements are undef, its trivial.
4819 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
4820 WidenedMask[i / 2] = SM_SentinelUndef;
4821 continue;
4822 }
4823
4824 // Check for an undef mask and a mask value properly aligned to fit with
4825 // a pair of values. If we find such a case, use the non-undef mask's value.
4826 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
4827 WidenedMask[i / 2] = M1 / 2;
4828 continue;
4829 }
4830 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
4831 WidenedMask[i / 2] = M0 / 2;
4832 continue;
4833 }
4834
4835 // When zeroing, we need to spread the zeroing across both lanes to widen.
4836 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
4837 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
4838 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
4839 WidenedMask[i / 2] = SM_SentinelZero;
4840 continue;
4841 }
4842 return false;
4843 }
4844
4845 // Finally check if the two mask values are adjacent and aligned with
4846 // a pair.
4847 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
4848 WidenedMask[i / 2] = M0 / 2;
4849 continue;
4850 }
4851
4852 // Otherwise we can't safely widen the elements used in this shuffle.
4853 return false;
4854 }
4855 assert(WidenedMask.size() == Mask.size() / 2 &&(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4856, __extension__ __PRETTY_FUNCTION__))
4856 "Incorrect size of mask after widening the elements!")(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4856, __extension__ __PRETTY_FUNCTION__))
;
4857
4858 return true;
4859}
4860
4861/// Returns true if Elt is a constant zero or a floating point constant +0.0.
4862bool X86::isZeroNode(SDValue Elt) {
4863 return isNullConstant(Elt) || isNullFPConstant(Elt);
4864}
4865
4866// Build a vector of constants.
4867// Use an UNDEF node if MaskElt == -1.
4868// Split 64-bit constants in the 32-bit mode.
4869static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
4870 const SDLoc &dl, bool IsMask = false) {
4871
4872 SmallVector<SDValue, 32> Ops;
4873 bool Split = false;
4874
4875 MVT ConstVecVT = VT;
4876 unsigned NumElts = VT.getVectorNumElements();
4877 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4878 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4879 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4880 Split = true;
4881 }
4882
4883 MVT EltVT = ConstVecVT.getVectorElementType();
4884 for (unsigned i = 0; i < NumElts; ++i) {
4885 bool IsUndef = Values[i] < 0 && IsMask;
4886 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
4887 DAG.getConstant(Values[i], dl, EltVT);
4888 Ops.push_back(OpNode);
4889 if (Split)
4890 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
4891 DAG.getConstant(0, dl, EltVT));
4892 }
4893 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4894 if (Split)
4895 ConstsNode = DAG.getBitcast(VT, ConstsNode);
4896 return ConstsNode;
4897}
4898
4899static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
4900 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4901 assert(Bits.size() == Undefs.getBitWidth() &&(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4902, __extension__ __PRETTY_FUNCTION__))
4902 "Unequal constant and undef arrays")(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4902, __extension__ __PRETTY_FUNCTION__))
;
4903 SmallVector<SDValue, 32> Ops;
4904 bool Split = false;
4905
4906 MVT ConstVecVT = VT;
4907 unsigned NumElts = VT.getVectorNumElements();
4908 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4909 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4910 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4911 Split = true;
4912 }
4913
4914 MVT EltVT = ConstVecVT.getVectorElementType();
4915 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
4916 if (Undefs[i]) {
4917 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
4918 continue;
4919 }
4920 const APInt &V = Bits[i];
4921 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")(static_cast <bool> (V.getBitWidth() == VT.getScalarSizeInBits
() && "Unexpected sizes") ? void (0) : __assert_fail (
"V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4921, __extension__ __PRETTY_FUNCTION__))
;
4922 if (Split) {
4923 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
4924 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
4925 } else if (EltVT == MVT::f32) {
4926 APFloat FV(APFloat::IEEEsingle(), V);
4927 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4928 } else if (EltVT == MVT::f64) {
4929 APFloat FV(APFloat::IEEEdouble(), V);
4930 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4931 } else {
4932 Ops.push_back(DAG.getConstant(V, dl, EltVT));
4933 }
4934 }
4935
4936 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4937 return DAG.getBitcast(VT, ConstsNode);
4938}
4939
4940/// Returns a vector of specified type with all zero elements.
4941static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
4942 SelectionDAG &DAG, const SDLoc &dl) {
4943 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4945, __extension__ __PRETTY_FUNCTION__))
4944 VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4945, __extension__ __PRETTY_FUNCTION__))
4945 "Unexpected vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4945, __extension__ __PRETTY_FUNCTION__))
;
4946
4947 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
4948 // type. This ensures they get CSE'd. But if the integer type is not
4949 // available, use a floating-point +0.0 instead.
4950 SDValue Vec;
4951 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
4952 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
4953 } else if (VT.getVectorElementType() == MVT::i1) {
4954 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __extension__ __PRETTY_FUNCTION__))
4955 "Unexpected vector type")(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __extension__ __PRETTY_FUNCTION__))
;
4956 Vec = DAG.getConstant(0, dl, VT);
4957 } else {
4958 unsigned Num32BitElts = VT.getSizeInBits() / 32;
4959 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
4960 }
4961 return DAG.getBitcast(VT, Vec);
4962}
4963
4964static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
4965 const SDLoc &dl, unsigned vectorWidth) {
4966 EVT VT = Vec.getValueType();
4967 EVT ElVT = VT.getVectorElementType();
4968 unsigned Factor = VT.getSizeInBits()/vectorWidth;
4969 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
4970 VT.getVectorNumElements()/Factor);
4971
4972 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
4973 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
4974 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4974, __extension__ __PRETTY_FUNCTION__))
;
4975
4976 // This is the index of the first element of the vectorWidth-bit chunk
4977 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
4978 IdxVal &= ~(ElemsPerChunk - 1);
4979
4980 // If the input is a buildvector just emit a smaller one.
4981 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
4982 return DAG.getBuildVector(ResultVT, dl,
4983 Vec->ops().slice(IdxVal, ElemsPerChunk));
4984
4985 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
4986 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
4987}
4988
4989/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
4990/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
4991/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
4992/// instructions or a simple subregister reference. Idx is an index in the
4993/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
4994/// lowering EXTRACT_VECTOR_ELT operations easier.
4995static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
4996 SelectionDAG &DAG, const SDLoc &dl) {
4997 assert((Vec.getValueType().is256BitVector() ||(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4998, __extension__ __PRETTY_FUNCTION__))
4998 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 4998, __extension__ __PRETTY_FUNCTION__))
;
4999 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5000}
5001
5002/// Generate a DAG to grab 256-bits from a 512-bit vector.
5003static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5004 SelectionDAG &DAG, const SDLoc &dl) {
5005 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is512BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5005, __extension__ __PRETTY_FUNCTION__))
;
5006 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5007}
5008
5009static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5010 SelectionDAG &DAG, const SDLoc &dl,
5011 unsigned vectorWidth) {
5012 assert((vectorWidth == 128 || vectorWidth == 256) &&(static_cast <bool> ((vectorWidth == 128 || vectorWidth
== 256) && "Unsupported vector width") ? void (0) : __assert_fail
("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5013, __extension__ __PRETTY_FUNCTION__))
5013 "Unsupported vector width")(static_cast <bool> ((vectorWidth == 128 || vectorWidth
== 256) && "Unsupported vector width") ? void (0) : __assert_fail
("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5013, __extension__ __PRETTY_FUNCTION__))
;
5014 // Inserting UNDEF is Result
5015 if (Vec.isUndef())
5016 return Result;
5017 EVT VT = Vec.getValueType();
5018 EVT ElVT = VT.getVectorElementType();
5019 EVT ResultVT = Result.getValueType();
5020
5021 // Insert the relevant vectorWidth bits.
5022 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5023 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5023, __extension__ __PRETTY_FUNCTION__))
;
5024
5025 // This is the index of the first element of the vectorWidth-bit chunk
5026 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5027 IdxVal &= ~(ElemsPerChunk - 1);
5028
5029 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5030 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5031}
5032
5033/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5034/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5035/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5036/// simple superregister reference. Idx is an index in the 128 bits
5037/// we want. It need not be aligned to a 128-bit boundary. That makes
5038/// lowering INSERT_VECTOR_ELT operations easier.
5039static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5040 SelectionDAG &DAG, const SDLoc &dl) {
5041 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is128BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5041, __extension__ __PRETTY_FUNCTION__))
;
5042 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5043}
5044
5045static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5046 SelectionDAG &DAG, const SDLoc &dl) {
5047 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is256BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is256BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5047, __extension__ __PRETTY_FUNCTION__))
;
5048 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
5049}
5050
5051// Helper for splitting operands of a binary operation to legal target size and
5052// apply a function on each part.
5053// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5054// 256-bit and on AVX512BW in 512-bit.
5055// The argument VT is the type used for deciding if/how to split the operands
5056// Op0 and Op1. Op0 and Op1 do *not* have to be of type VT.
5057// The argument Builder is a function that will be applied on each split psrt:
5058// SDValue Builder(SelectionDAG&G, SDLoc, SDValue, SDValue)
5059template <typename F>
5060SDValue SplitBinaryOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5061 const SDLoc &DL, EVT VT, SDValue Op0,
5062 SDValue Op1, F Builder) {
5063 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")(static_cast <bool> (Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? void (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5063, __extension__ __PRETTY_FUNCTION__))
;
5064 unsigned NumSubs = 1;
5065 if (Subtarget.useBWIRegs()) {
5066 if (VT.getSizeInBits() > 512) {
5067 NumSubs = VT.getSizeInBits() / 512;
5068 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(static_cast <bool> ((VT.getSizeInBits() % 512) == 0 &&
"Illegal vector size") ? void (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5068, __extension__ __PRETTY_FUNCTION__))
;
5069 }
5070 } else if (Subtarget.hasAVX2()) {
5071 if (VT.getSizeInBits() > 256) {
5072 NumSubs = VT.getSizeInBits() / 256;
5073 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(static_cast <bool> ((VT.getSizeInBits() % 256) == 0 &&
"Illegal vector size") ? void (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5073, __extension__ __PRETTY_FUNCTION__))
;
5074 }
5075 } else {
5076 if (VT.getSizeInBits() > 128) {
5077 NumSubs = VT.getSizeInBits() / 128;
5078 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(static_cast <bool> ((VT.getSizeInBits() % 128) == 0 &&
"Illegal vector size") ? void (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5078, __extension__ __PRETTY_FUNCTION__))
;
5079 }
5080 }
5081
5082 if (NumSubs == 1)
5083 return Builder(DAG, DL, Op0, Op1);
5084
5085 SmallVector<SDValue, 4> Subs;
5086 EVT InVT = Op0.getValueType();
5087 EVT SubVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
5088 InVT.getVectorNumElements() / NumSubs);
5089 for (unsigned i = 0; i != NumSubs; ++i) {
5090 unsigned Idx = i * SubVT.getVectorNumElements();
5091 SDValue LHS = extractSubVector(Op0, Idx, DAG, DL, SubVT.getSizeInBits());
5092 SDValue RHS = extractSubVector(Op1, Idx, DAG, DL, SubVT.getSizeInBits());
5093 Subs.push_back(Builder(DAG, DL, LHS, RHS));
5094 }
5095 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5096}
5097
5098// Return true if the instruction zeroes the unused upper part of the
5099// destination and accepts mask.
5100static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5101 switch (Opcode) {
5102 default:
5103 return false;
5104 case X86ISD::CMPM:
5105 case X86ISD::CMPMU:
5106 case X86ISD::CMPM_RND:
5107 return true;
5108 }
5109}
5110
5111/// Insert i1-subvector to i1-vector.
5112static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5113 const X86Subtarget &Subtarget) {
5114
5115 SDLoc dl(Op);
5116 SDValue Vec = Op.getOperand(0);
5117 SDValue SubVec = Op.getOperand(1);
5118 SDValue Idx = Op.getOperand(2);
5119
5120 if (!isa<ConstantSDNode>(Idx))
5121 return SDValue();
5122
5123 // Inserting undef is a nop. We can just return the original vector.
5124 if (SubVec.isUndef())
5125 return Vec;
5126
5127 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5128 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5129 return Op;
5130
5131 MVT OpVT = Op.getSimpleValueType();
5132 unsigned NumElems = OpVT.getVectorNumElements();
5133
5134 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5135
5136 // Extend to natively supported kshift.
5137 MVT WideOpVT = OpVT;
5138 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5139 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5140
5141 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5142 // if necessary.
5143 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5144 // May need to promote to a legal type.
5145 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5146 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5147 SubVec, Idx);
5148 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5149 }
5150
5151 MVT SubVecVT = SubVec.getSimpleValueType();
5152 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5153
5154 assert(IdxVal + SubVecNumElems <= NumElems &&(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5156, __extension__ __PRETTY_FUNCTION__))
5155 IdxVal % SubVecVT.getSizeInBits() == 0 &&(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5156, __extension__ __PRETTY_FUNCTION__))
5156 "Unexpected index value in INSERT_SUBVECTOR")(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5156, __extension__ __PRETTY_FUNCTION__))
;
5157
5158 SDValue Undef = DAG.getUNDEF(WideOpVT);
5159
5160 if (IdxVal == 0) {
5161 // Zero lower bits of the Vec
5162 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5163 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5164 ZeroIdx);
5165 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5166 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5167 // Merge them together, SubVec should be zero extended.
5168 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5169 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5170 SubVec, ZeroIdx);
5171 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5172 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5173 }
5174
5175 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5176 Undef, SubVec, ZeroIdx);
5177
5178 if (Vec.isUndef()) {
5179 assert(IdxVal != 0 && "Unexpected index")(static_cast <bool> (IdxVal != 0 && "Unexpected index"
) ? void (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5179, __extension__ __PRETTY_FUNCTION__))
;
5180 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5181 DAG.getConstant(IdxVal, dl, MVT::i8));
5182 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5183 }
5184
5185 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5186 assert(IdxVal != 0 && "Unexpected index")(static_cast <bool> (IdxVal != 0 && "Unexpected index"
) ? void (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5186, __extension__ __PRETTY_FUNCTION__))
;
5187 NumElems = WideOpVT.getVectorNumElements();
5188 unsigned ShiftLeft = NumElems - SubVecNumElems;
5189 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5190 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5191 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5192 if (ShiftRight != 0)
5193 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5194 DAG.getConstant(ShiftRight, dl, MVT::i8));
5195 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5196 }
5197
5198 // Simple case when we put subvector in the upper part
5199 if (IdxVal + SubVecNumElems == NumElems) {
5200 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5201 DAG.getConstant(IdxVal, dl, MVT::i8));
5202 if (SubVecNumElems * 2 == NumElems) {
5203 // Special case, use legal zero extending insert_subvector. This allows
5204 // isel to opimitize when bits are known zero.
5205 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5206 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5207 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5208 Vec, ZeroIdx);
5209 } else {
5210 // Otherwise use explicit shifts to zero the bits.
5211 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5212 Undef, Vec, ZeroIdx);
5213 NumElems = WideOpVT.getVectorNumElements();
5214 SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
5215 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5216 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5217 }
5218 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5219 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5220 }
5221
5222 // Inserting into the middle is more complicated.
5223
5224 NumElems = WideOpVT.getVectorNumElements();
5225
5226 // Widen the vector if needed.
5227 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5228 // Move the current value of the bit to be replace to the lsbs.
5229 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5230 DAG.getConstant(IdxVal, dl, MVT::i8));
5231 // Xor with the new bit.
5232 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
5233 // Shift to MSB, filling bottom bits with 0.
5234 unsigned ShiftLeft = NumElems - SubVecNumElems;
5235 Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
5236 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5237 // Shift to the final position, filling upper bits with 0.
5238 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5239 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
5240 DAG.getConstant(ShiftRight, dl, MVT::i8));
5241 // Xor with original vector leaving the new value.
5242 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
5243 // Reduce to original width if needed.
5244 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5245}
5246
5247/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
5248/// instructions. This is used because creating CONCAT_VECTOR nodes of
5249/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
5250/// large BUILD_VECTORS.
5251static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
5252 unsigned NumElems, SelectionDAG &DAG,
5253 const SDLoc &dl) {
5254 SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5255 return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
5256}
5257
5258static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
5259 unsigned NumElems, SelectionDAG &DAG,
5260 const SDLoc &dl) {
5261 SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5262 return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
5263}
5264
5265static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
5266 unsigned NumElems, SelectionDAG &DAG,
5267 const SDLoc &dl, unsigned VectorWidth) {
5268 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth);
5269 return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth);
5270}
5271
5272/// Returns a vector of specified type with all bits set.
5273/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5274/// Then bitcast to their original type, ensuring they get CSE'd.
5275static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5276 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5277, __extension__ __PRETTY_FUNCTION__))
5277 "Expected a 128/256/512-bit vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5277, __extension__ __PRETTY_FUNCTION__))
;
5278
5279 APInt Ones = APInt::getAllOnesValue(32);
5280 unsigned NumElts = VT.getSizeInBits() / 32;
5281 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5282 return DAG.getBitcast(VT, Vec);
5283}
5284
5285static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
5286 SelectionDAG &DAG) {
5287 EVT InVT = In.getValueType();
5288 assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode")(static_cast <bool> ((X86ISD::VSEXT == Opc || X86ISD::VZEXT
== Opc) && "Unexpected opcode") ? void (0) : __assert_fail
("(X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5288, __extension__ __PRETTY_FUNCTION__))
;
5289
5290 if (VT.is128BitVector() && InVT.is128BitVector())
5291 return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
5292 : DAG.getZeroExtendVectorInReg(In, DL, VT);
5293
5294 // For 256-bit vectors, we only need the lower (128-bit) input half.
5295 // For 512-bit vectors, we only need the lower input half or quarter.
5296 if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
5297 int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5298 In = extractSubVector(In, 0, DAG, DL,
5299 std::max(128, (int)VT.getSizeInBits() / Scale));
5300 }
5301
5302 return DAG.getNode(Opc, DL, VT, In);
5303}
5304
5305/// Returns a vector_shuffle node for an unpackl operation.
5306static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5307 SDValue V1, SDValue V2) {
5308 SmallVector<int, 8> Mask;
5309 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5310 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5311}
5312
5313/// Returns a vector_shuffle node for an unpackh operation.
5314static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5315 SDValue V1, SDValue V2) {
5316 SmallVector<int, 8> Mask;
5317 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5318 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5319}
5320
5321/// Return a vector_shuffle of the specified vector of zero or undef vector.
5322/// This produces a shuffle where the low element of V2 is swizzled into the
5323/// zero/undef vector, landing at element Idx.
5324/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5325static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5326 bool IsZero,
5327 const X86Subtarget &Subtarget,
5328 SelectionDAG &DAG) {
5329 MVT VT = V2.getSimpleValueType();
5330 SDValue V1 = IsZero
5331 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5332 int NumElems = VT.getVectorNumElements();
5333 SmallVector<int, 16> MaskVec(NumElems);
5334 for (int i = 0; i != NumElems; ++i)
5335 // If this is the insertion idx, put the low elt of V2 here.
5336 MaskVec[i] = (i == Idx) ? NumElems : i;
5337 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5338}
5339
5340static SDValue peekThroughBitcasts(SDValue V) {
5341 while (V.getNode() && V.getOpcode() == ISD::BITCAST)
5342 V = V.getOperand(0);
5343 return V;
5344}
5345
5346static SDValue peekThroughOneUseBitcasts(SDValue V) {
5347 while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
5348 V.getOperand(0).hasOneUse())
5349 V = V.getOperand(0);
5350 return V;
5351}
5352
5353static const Constant *getTargetConstantFromNode(SDValue Op) {
5354 Op = peekThroughBitcasts(Op);
5355
5356 auto *Load = dyn_cast<LoadSDNode>(Op);
5357 if (!Load)
5358 return nullptr;
5359
5360 SDValue Ptr = Load->getBasePtr();
5361 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5362 Ptr->getOpcode() == X86ISD::WrapperRIP)
5363 Ptr = Ptr->getOperand(0);
5364
5365 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5366 if (!CNode || CNode->isMachineConstantPoolEntry())
5367 return nullptr;
5368
5369 return dyn_cast<Constant>(CNode->getConstVal());
5370}
5371
5372// Extract raw constant bits from constant pools.
5373static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5374 APInt &UndefElts,
5375 SmallVectorImpl<APInt> &EltBits,
5376 bool AllowWholeUndefs = true,
5377 bool AllowPartialUndefs = true) {
5378 assert(EltBits.empty() && "Expected an empty EltBits vector")(static_cast <bool> (EltBits.empty() && "Expected an empty EltBits vector"
) ? void (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5378, __extension__ __PRETTY_FUNCTION__))
;
5379
5380 Op = peekThroughBitcasts(Op);
5381
5382 EVT VT = Op.getValueType();
5383 unsigned SizeInBits = VT.getSizeInBits();
5384 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(static_cast <bool> ((SizeInBits % EltSizeInBits) == 0 &&
"Can't split constant!") ? void (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5384, __extension__ __PRETTY_FUNCTION__))
;
5385 unsigned NumElts = SizeInBits / EltSizeInBits;
5386
5387 // Bitcast a source array of element bits to the target size.
5388 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5389 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5390 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5391 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(static_cast <bool> ((NumSrcElts * SrcEltSizeInBits) ==
SizeInBits && "Constant bit sizes don't match") ? void
(0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5392, __extension__ __PRETTY_FUNCTION__))
5392 "Constant bit sizes don't match")(static_cast <bool> ((NumSrcElts * SrcEltSizeInBits) ==
SizeInBits && "Constant bit sizes don't match") ? void
(0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5392, __extension__ __PRETTY_FUNCTION__))
;
5393
5394 // Don't split if we don't allow undef bits.
5395 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5396 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5397 return false;
5398
5399 // If we're already the right size, don't bother bitcasting.
5400 if (NumSrcElts == NumElts) {
5401 UndefElts = UndefSrcElts;
5402 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5403 return true;
5404 }
5405
5406 // Extract all the undef/constant element data and pack into single bitsets.
5407 APInt UndefBits(SizeInBits, 0);
5408 APInt MaskBits(SizeInBits, 0);
5409
5410 for (unsigned i = 0; i != NumSrcElts; ++i) {
5411 unsigned BitOffset = i * SrcEltSizeInBits;
5412 if (UndefSrcElts[i])
5413 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5414 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5415 }
5416
5417 // Split the undef/constant single bitset data into the target elements.
5418 UndefElts = APInt(NumElts, 0);
5419 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5420
5421 for (unsigned i = 0; i != NumElts; ++i) {
5422 unsigned BitOffset = i * EltSizeInBits;
5423 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5424
5425 // Only treat an element as UNDEF if all bits are UNDEF.
5426 if (UndefEltBits.isAllOnesValue()) {
5427 if (!AllowWholeUndefs)
5428 return false;
5429 UndefElts.setBit(i);
5430 continue;
5431 }
5432
5433 // If only some bits are UNDEF then treat them as zero (or bail if not
5434 // supported).
5435 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5436 return false;
5437
5438 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5439 EltBits[i] = Bits.getZExtValue();
5440 }
5441 return true;
5442 };
5443
5444 // Collect constant bits and insert into mask/undef bit masks.
5445 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5446 unsigned UndefBitIndex) {
5447 if (!Cst)
5448 return false;
5449 if (isa<UndefValue>(Cst)) {
5450 Undefs.setBit(UndefBitIndex);
5451 return true;
5452 }
5453 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5454 Mask = CInt->getValue();
5455 return true;
5456 }
5457 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5458 Mask = CFP->getValueAPF().bitcastToAPInt();
5459 return true;
5460 }
5461 return false;
5462 };
5463
5464 // Handle UNDEFs.
5465 if (Op.isUndef()) {
5466 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5467 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5468 return CastBitData(UndefSrcElts, SrcEltBits);
5469 }
5470
5471 // Extract scalar constant bits.
5472 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5473 APInt UndefSrcElts = APInt::getNullValue(1);
5474 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5475 return CastBitData(UndefSrcElts, SrcEltBits);
5476 }
5477 if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
5478 APInt UndefSrcElts = APInt::getNullValue(1);
5479 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5480 SmallVector<APInt, 64> SrcEltBits(1, RawBits);
5481 return CastBitData(UndefSrcElts, SrcEltBits);
5482 }
5483
5484 // Extract constant bits from build vector.
5485 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5486 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5487 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5488
5489 APInt UndefSrcElts(NumSrcElts, 0);
5490 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5491 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5492 const SDValue &Src = Op.getOperand(i);
5493 if (Src.isUndef()) {
5494 UndefSrcElts.setBit(i);
5495 continue;
5496 }
5497 auto *Cst = cast<ConstantSDNode>(Src);
5498 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5499 }
5500 return CastBitData(UndefSrcElts, SrcEltBits);
5501 }
5502
5503 // Extract constant bits from constant pool vector.
5504 if (auto *Cst = getTargetConstantFromNode(Op)) {
5505 Type *CstTy = Cst->getType();
5506 if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
5507 return false;
5508
5509 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
5510 unsigned NumSrcElts = CstTy->getVectorNumElements();
5511
5512 APInt UndefSrcElts(NumSrcElts, 0);
5513 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5514 for (unsigned i = 0; i != NumSrcElts; ++i)
5515 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
5516 UndefSrcElts, i))
5517 return false;
5518
5519 return CastBitData(UndefSrcElts, SrcEltBits);
5520 }
5521
5522 // Extract constant bits from a broadcasted constant pool scalar.
5523 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5524 EltSizeInBits <= VT.getScalarSizeInBits()) {
5525 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5526 unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
5527 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5528
5529 APInt UndefSrcElts(NumSrcElts, 0);
5530 SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
5531 if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
5532 if (UndefSrcElts[0])
5533 UndefSrcElts.setBits(0, NumSrcElts);
5534 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
5535 return CastBitData(UndefSrcElts, SrcEltBits);
5536 }
5537 }
5538 }
5539
5540 // Extract a rematerialized scalar constant insertion.
5541 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5542 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5543 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5544 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5545 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5546
5547 APInt UndefSrcElts(NumSrcElts, 0);
5548 SmallVector<APInt, 64> SrcEltBits;
5549 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5550 SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
5551 SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
5552 return CastBitData(UndefSrcElts, SrcEltBits);
5553 }
5554
5555 return false;
5556}
5557
5558static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5559 unsigned MaskEltSizeInBits,
5560 SmallVectorImpl<uint64_t> &RawMask) {
5561 APInt UndefElts;
5562 SmallVector<APInt, 64> EltBits;
5563
5564 // Extract the raw target constant bits.
5565 // FIXME: We currently don't support UNDEF bits or mask entries.
5566 if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
5567 EltBits, /* AllowWholeUndefs */ false,
5568 /* AllowPartialUndefs */ false))
5569 return false;
5570
5571 // Insert the extracted elements into the mask.
5572 for (APInt Elt : EltBits)
5573 RawMask.push_back(Elt.getZExtValue());
5574
5575 return true;
5576}
5577
5578/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
5579/// Note: This ignores saturation, so inputs must be checked first.
5580static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
5581 bool Unary) {
5582 assert(Mask.empty() && "Expected an empty shuffle mask vector")(static_cast <bool> (Mask.empty() && "Expected an empty shuffle mask vector"
) ? void (0) : __assert_fail ("Mask.empty() && \"Expected an empty shuffle mask vector\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5582, __extension__ __PRETTY_FUNCTION__))
;
5583 unsigned NumElts = VT.getVectorNumElements();
5584 unsigned NumLanes = VT.getSizeInBits() / 128;
5585 unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();
5586 unsigned Offset = Unary ? 0 : NumElts;
5587
5588 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
5589 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5590 Mask.push_back(Elt + (Lane * NumEltsPerLane));
5591 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5592 Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
5593 }
5594}
5595
5596/// Calculates the shuffle mask corresponding to the target-specific opcode.
5597/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
5598/// operands in \p Ops, and returns true.
5599/// Sets \p IsUnary to true if only one source is used. Note that this will set
5600/// IsUnary for shuffles which use a single input multiple times, and in those
5601/// cases it will adjust the mask to only have indices within that single input.
5602/// It is an error to call this with non-empty Mask/Ops vectors.
5603static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
5604 SmallVectorImpl<SDValue> &Ops,
5605 SmallVectorImpl<int> &Mask, bool &IsUnary) {
5606 unsigned NumElems = VT.getVectorNumElements();
5607 SDValue ImmN;
5608
5609 assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector")(static_cast <bool> (Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"
) ? void (0) : __assert_fail ("Mask.empty() && \"getTargetShuffleMask expects an empty Mask vector\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5609, __extension__ __PRETTY_FUNCTION__))
;
5610 assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector")(static_cast <bool> (Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"
) ? void (0) : __assert_fail ("Ops.empty() && \"getTargetShuffleMask expects an empty Ops vector\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5610, __extension__ __PRETTY_FUNCTION__))
;
5611
5612 IsUnary = false;
5613 bool IsFakeUnary = false;
5614 switch(N->getOpcode()) {
5615 case X86ISD::BLENDI:
5616 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5616, __extension__ __PRETTY_FUNCTION__))
;
5617 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5617, __extension__ __PRETTY_FUNCTION__))
;
5618 ImmN = N->getOperand(N->getNumOperands()-1);
5619 DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5620 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5621 break;
5622 case X86ISD::SHUFP:
5623 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5623, __extension__ __PRETTY_FUNCTION__))
;
5624 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5624, __extension__ __PRETTY_FUNCTION__))
;
5625 ImmN = N->getOperand(N->getNumOperands()-1);
5626 DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5627 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5628 break;
5629 case X86ISD::INSERTPS:
5630 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5630, __extension__ __PRETTY_FUNCTION__))
;
5631 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5631, __extension__ __PRETTY_FUNCTION__))
;
5632 ImmN = N->getOperand(N->getNumOperands()-1);
5633 DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5634 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5635 break;
5636 case X86ISD::EXTRQI:
5637 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5637, __extension__ __PRETTY_FUNCTION__))
;
5638 if (isa<ConstantSDNode>(N->getOperand(1)) &&
5639 isa<ConstantSDNode>(N->getOperand(2))) {
5640 int BitLen = N->getConstantOperandVal(1);
5641 int BitIdx = N->getConstantOperandVal(2);
5642 DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
5643 IsUnary = true;
5644 }
5645 break;
5646 case X86ISD::INSERTQI:
5647 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5647, __extension__ __PRETTY_FUNCTION__))
;
5648 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5648, __extension__ __PRETTY_FUNCTION__))
;
5649 if (isa<ConstantSDNode>(N->getOperand(2)) &&
5650 isa<ConstantSDNode>(N->getOperand(3))) {
5651 int BitLen = N->getConstantOperandVal(2);
5652 int BitIdx = N->getConstantOperandVal(3);
5653 DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
5654 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5655 }
5656 break;
5657 case X86ISD::UNPCKH:
5658 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5658, __extension__ __PRETTY_FUNCTION__))
;
5659 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5659, __extension__ __PRETTY_FUNCTION__))
;
5660 DecodeUNPCKHMask(VT, Mask);
5661 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5662 break;
5663 case X86ISD::UNPCKL:
5664 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5664, __extension__ __PRETTY_FUNCTION__))
;
5665 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5665, __extension__ __PRETTY_FUNCTION__))
;
5666 DecodeUNPCKLMask(VT, Mask);
5667 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5668 break;
5669 case X86ISD::MOVHLPS:
5670 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5670, __extension__ __PRETTY_FUNCTION__))
;
5671 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5671, __extension__ __PRETTY_FUNCTION__))
;
5672 DecodeMOVHLPSMask(NumElems, Mask);
5673 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5674 break;
5675 case X86ISD::MOVLHPS:
5676 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5676, __extension__ __PRETTY_FUNCTION__))
;
5677 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5677, __extension__ __PRETTY_FUNCTION__))
;
5678 DecodeMOVLHPSMask(NumElems, Mask);
5679 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5680 break;
5681 case X86ISD::PALIGNR:
5682 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5682, __extension__ __PRETTY_FUNCTION__))
;
5683 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5683, __extension__ __PRETTY_FUNCTION__))
;
5684 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5684, __extension__ __PRETTY_FUNCTION__))
;
5685 ImmN = N->getOperand(N->getNumOperands()-1);
5686 DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5687 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5688 Ops.push_back(N->getOperand(1));
5689 Ops.push_back(N->getOperand(0));
5690 break;
5691 case X86ISD::VSHLDQ:
5692 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5692, __extension__ __PRETTY_FUNCTION__))
;
5693 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5693, __extension__ __PRETTY_FUNCTION__))
;
5694 ImmN = N->getOperand(N->getNumOperands() - 1);
5695 DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5696 IsUnary = true;
5697 break;
5698 case X86ISD::VSRLDQ:
5699 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5699, __extension__ __PRETTY_FUNCTION__))
;
5700 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5700, __extension__ __PRETTY_FUNCTION__))
;
5701 ImmN = N->getOperand(N->getNumOperands() - 1);
5702 DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5703 IsUnary = true;
5704 break;
5705 case X86ISD::PSHUFD:
5706 case X86ISD::VPERMILPI:
5707 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5707, __extension__ __PRETTY_FUNCTION__))
;
5708 ImmN = N->getOperand(N->getNumOperands()-1);
5709 DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5710 IsUnary = true;
5711 break;
5712 case X86ISD::PSHUFHW:
5713 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5713, __extension__ __PRETTY_FUNCTION__))
;
5714 ImmN = N->getOperand(N->getNumOperands()-1);
5715 DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5716 IsUnary = true;
5717 break;
5718 case X86ISD::PSHUFLW:
5719 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5719, __extension__ __PRETTY_FUNCTION__))
;
5720 ImmN = N->getOperand(N->getNumOperands()-1);
5721 DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5722 IsUnary = true;
5723 break;
5724 case X86ISD::VZEXT_MOVL:
5725 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5725, __extension__ __PRETTY_FUNCTION__))
;
5726 DecodeZeroMoveLowMask(VT, Mask);
5727 IsUnary = true;
5728 break;
5729 case X86ISD::VBROADCAST: {
5730 SDValue N0 = N->getOperand(0);
5731 // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
5732 // add the pre-extracted value to the Ops vector.
5733 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5734 N0.getOperand(0).getValueType() == VT &&
5735 N0.getConstantOperandVal(1) == 0)
5736 Ops.push_back(N0.getOperand(0));
5737
5738 // We only decode broadcasts of same-sized vectors, unless the broadcast
5739 // came from an extract from the original width. If we found one, we
5740 // pushed it the Ops vector above.
5741 if (N0.getValueType() == VT || !Ops.empty()) {
5742 DecodeVectorBroadcast(VT, Mask);
5743 IsUnary = true;
5744 break;
5745 }
5746 return false;
5747 }
5748 case X86ISD::VPERMILPV: {
5749 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5749, __extension__ __PRETTY_FUNCTION__))
;
5750 IsUnary = true;
5751 SDValue MaskNode = N->getOperand(1);
5752 unsigned MaskEltSize = VT.getScalarSizeInBits();
5753 SmallVector<uint64_t, 32> RawMask;
5754 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5755 DecodeVPERMILPMask(VT, RawMask, Mask);
5756 break;
5757 }
5758 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5759 DecodeVPERMILPMask(C, MaskEltSize, Mask);
5760 break;
5761 }
5762 return false;
5763 }
5764 case X86ISD::PSHUFB: {
5765 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5765, __extension__ __PRETTY_FUNCTION__))
;
5766 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5766, __extension__ __PRETTY_FUNCTION__))
;
5767 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5767, __extension__ __PRETTY_FUNCTION__))
;
5768 IsUnary = true;
5769 SDValue MaskNode = N->getOperand(1);
5770 SmallVector<uint64_t, 32> RawMask;
5771 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5772 DecodePSHUFBMask(RawMask, Mask);
5773 break;
5774 }
5775 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5776 DecodePSHUFBMask(C, Mask);
5777 break;
5778 }
5779 return false;
5780 }
5781 case X86ISD::VPERMI:
5782 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5782, __extension__ __PRETTY_FUNCTION__))
;
5783 ImmN = N->getOperand(N->getNumOperands()-1);
5784 DecodeVPERMMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5785 IsUnary = true;
5786 break;
5787 case X86ISD::MOVSS:
5788 case X86ISD::MOVSD:
5789 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5789, __extension__ __PRETTY_FUNCTION__))
;
5790 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5790, __extension__ __PRETTY_FUNCTION__))
;
5791 DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
5792 break;
5793 case X86ISD::VPERM2X128:
5794 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5794, __extension__ __PRETTY_FUNCTION__))
;
5795 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5795, __extension__ __PRETTY_FUNCTION__))
;
5796 ImmN = N->getOperand(N->getNumOperands()-1);
5797 DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5798 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5799 break;
5800 case X86ISD::MOVSLDUP:
5801 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5801, __extension__ __PRETTY_FUNCTION__))
;
5802 DecodeMOVSLDUPMask(VT, Mask);
5803 IsUnary = true;
5804 break;
5805 case X86ISD::MOVSHDUP:
5806 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5806, __extension__ __PRETTY_FUNCTION__))
;
5807 DecodeMOVSHDUPMask(VT, Mask);
5808 IsUnary = true;
5809 break;
5810 case X86ISD::MOVDDUP:
5811 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/X86/X86ISelLowering.cpp"
, 5811, __extension__ __PRETTY_FUNCTION__))</