Bug Summary

File:include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1142, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn337490/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86 -I /build/llvm-toolchain-snapshot-7~svn337490/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn337490/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/lib/gcc/x86_64-linux-gnu/8/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn337490/build-llvm/lib/Target/X86 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-07-20-043646-20380-1 -x c++ /build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp -faddrsig

/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86ShuffleDecodeConstantPool.h"
23#include "X86TargetMachine.h"
24#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/WinEHFuncInfo.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/DiagnosticInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalAlias.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/MC/MCAsmInfo.h"
51#include "llvm/MC/MCContext.h"
52#include "llvm/MC/MCExpr.h"
53#include "llvm/MC/MCSymbol.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/MathExtras.h"
59#include "llvm/Target/TargetOptions.h"
60#include <algorithm>
61#include <bitset>
62#include <cctype>
63#include <numeric>
64using namespace llvm;
65
66#define DEBUG_TYPE"x86-isel" "x86-isel"
67
68STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
69
70static cl::opt<bool> ExperimentalVectorWideningLegalization(
71 "x86-experimental-vector-widening-legalization", cl::init(false),
72 cl::desc("Enable an experimental vector type legalization through widening "
73 "rather than promotion."),
74 cl::Hidden);
75
76static cl::opt<int> ExperimentalPrefLoopAlignment(
77 "x86-experimental-pref-loop-alignment", cl::init(4),
78 cl::desc("Sets the preferable loop alignment for experiments "
79 "(the last x86-experimental-pref-loop-alignment bits"
80 " of the loop header PC will be 0)."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89/// Call this when the user attempts to do something unsupported, like
90/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
91/// report_fatal_error, so calling code should attempt to recover without
92/// crashing.
93static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
94 const char *Msg) {
95 MachineFunction &MF = DAG.getMachineFunction();
96 DAG.getContext()->diagnose(
97 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
98}
99
100X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
101 const X86Subtarget &STI)
102 : TargetLowering(TM), Subtarget(STI) {
103 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
104 X86ScalarSSEf64 = Subtarget.hasSSE2();
105 X86ScalarSSEf32 = Subtarget.hasSSE1();
106 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
107
108 // Set up the TargetLowering object.
109
110 // X86 is weird. It always uses i8 for shift amounts and setcc results.
111 setBooleanContents(ZeroOrOneBooleanContent);
112 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
113 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
114
115 // For 64-bit, since we have so many registers, use the ILP scheduler.
116 // For 32-bit, use the register pressure specific scheduling.
117 // For Atom, always use ILP scheduling.
118 if (Subtarget.isAtom())
119 setSchedulingPreference(Sched::ILP);
120 else if (Subtarget.is64Bit())
121 setSchedulingPreference(Sched::ILP);
122 else
123 setSchedulingPreference(Sched::RegPressure);
124 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
125 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
126
127 // Bypass expensive divides and use cheaper ones.
128 if (TM.getOptLevel() >= CodeGenOpt::Default) {
129 if (Subtarget.hasSlowDivide32())
130 addBypassSlowDiv(32, 8);
131 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
132 addBypassSlowDiv(64, 32);
133 }
134
135 if (Subtarget.isTargetKnownWindowsMSVC() ||
136 Subtarget.isTargetWindowsItanium()) {
137 // Setup Windows compiler runtime calls.
138 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
139 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
140 setLibcallName(RTLIB::SREM_I64, "_allrem");
141 setLibcallName(RTLIB::UREM_I64, "_aullrem");
142 setLibcallName(RTLIB::MUL_I64, "_allmul");
143 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
146 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
147 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
148 }
149
150 if (Subtarget.isTargetDarwin()) {
151 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
152 setUseUnderscoreSetJmp(false);
153 setUseUnderscoreLongJmp(false);
154 } else if (Subtarget.isTargetWindowsGNU()) {
155 // MS runtime is weird: it exports _setjmp, but longjmp!
156 setUseUnderscoreSetJmp(true);
157 setUseUnderscoreLongJmp(false);
158 } else {
159 setUseUnderscoreSetJmp(true);
160 setUseUnderscoreLongJmp(true);
161 }
162
163 // Set up the register classes.
164 addRegisterClass(MVT::i8, &X86::GR8RegClass);
165 addRegisterClass(MVT::i16, &X86::GR16RegClass);
166 addRegisterClass(MVT::i32, &X86::GR32RegClass);
167 if (Subtarget.is64Bit())
168 addRegisterClass(MVT::i64, &X86::GR64RegClass);
169
170 for (MVT VT : MVT::integer_valuetypes())
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
172
173 // We don't accept any truncstore of integer registers.
174 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
175 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
176 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
177 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
178 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
179 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
180
181 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
182
183 // SETOEQ and SETUNE require checking two conditions.
184 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
185 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
186 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
187 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
188 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
189 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
190
191 // Integer absolute.
192 if (Subtarget.hasCMov()) {
193 setOperationAction(ISD::ABS , MVT::i16 , Custom);
194 setOperationAction(ISD::ABS , MVT::i32 , Custom);
195 if (Subtarget.is64Bit())
196 setOperationAction(ISD::ABS , MVT::i64 , Custom);
197 }
198
199 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
200 // operation.
201 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
202 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
203 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
204
205 if (Subtarget.is64Bit()) {
206 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
207 // f32/f64 are legal, f80 is custom.
208 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
209 else
210 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
211 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
212 } else if (!Subtarget.useSoftFloat()) {
213 // We have an algorithm for SSE2->double, and we turn this into a
214 // 64-bit FILD followed by conditional FADD for other targets.
215 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
216 // We have an algorithm for SSE2, and we turn this into a 64-bit
217 // FILD or VCVTUSI2SS/SD for other targets.
218 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
219 } else {
220 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
221 }
222
223 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
224 // this operation.
225 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
226 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
227
228 if (!Subtarget.useSoftFloat()) {
229 // SSE has no i16 to fp conversion, only i32.
230 if (X86ScalarSSEf32) {
231 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
232 // f32 and f64 cases are Legal, f80 case is not
233 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
234 } else {
235 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
236 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
237 }
238 } else {
239 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
240 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
241 }
242
243 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
244 // this operation.
245 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
246 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
247
248 if (!Subtarget.useSoftFloat()) {
249 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
250 // are Legal, f80 is custom lowered.
251 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
252 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
253
254 if (X86ScalarSSEf32) {
255 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
256 // f32 and f64 cases are Legal, f80 case is not
257 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
258 } else {
259 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
260 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
261 }
262 } else {
263 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
264 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
265 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
266 }
267
268 // Handle FP_TO_UINT by promoting the destination to a larger signed
269 // conversion.
270 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
271 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
272 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
273
274 if (Subtarget.is64Bit()) {
275 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
276 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
277 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
278 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
279 } else {
280 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
281 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
282 }
283 } else if (!Subtarget.useSoftFloat()) {
284 // Since AVX is a superset of SSE3, only check for SSE here.
285 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
286 // Expand FP_TO_UINT into a select.
287 // FIXME: We would like to use a Custom expander here eventually to do
288 // the optimal thing for SSE vs. the default expansion in the legalizer.
289 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
290 else
291 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
292 // With SSE3 we can use fisttpll to convert to a signed i64; without
293 // SSE, we're stuck with a fistpll.
294 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
295
296 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
297 }
298
299 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
300 if (!X86ScalarSSEf64) {
301 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
302 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
303 if (Subtarget.is64Bit()) {
304 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
305 // Without SSE, i64->f64 goes through memory.
306 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
307 }
308 } else if (!Subtarget.is64Bit())
309 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
310
311 // Scalar integer divide and remainder are lowered to use operations that
312 // produce two results, to match the available instructions. This exposes
313 // the two-result form to trivial CSE, which is able to combine x/y and x%y
314 // into a single instruction.
315 //
316 // Scalar integer multiply-high is also lowered to use two-result
317 // operations, to match the available instructions. However, plain multiply
318 // (low) operations are left as Legal, as there are single-result
319 // instructions for this in x86. Using the two-result multiply instructions
320 // when both high and low results are needed must be arranged by dagcombine.
321 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
322 setOperationAction(ISD::MULHS, VT, Expand);
323 setOperationAction(ISD::MULHU, VT, Expand);
324 setOperationAction(ISD::SDIV, VT, Expand);
325 setOperationAction(ISD::UDIV, VT, Expand);
326 setOperationAction(ISD::SREM, VT, Expand);
327 setOperationAction(ISD::UREM, VT, Expand);
328 }
329
330 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
331 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
332 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
333 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
334 setOperationAction(ISD::BR_CC, VT, Expand);
335 setOperationAction(ISD::SELECT_CC, VT, Expand);
336 }
337 if (Subtarget.is64Bit())
338 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
340 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
341 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
342 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
343
344 setOperationAction(ISD::FREM , MVT::f32 , Expand);
345 setOperationAction(ISD::FREM , MVT::f64 , Expand);
346 setOperationAction(ISD::FREM , MVT::f80 , Expand);
347 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
348
349 // Promote the i8 variants and force them on up to i32 which has a shorter
350 // encoding.
351 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
352 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
353 if (!Subtarget.hasBMI()) {
354 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
355 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
356 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
357 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
358 if (Subtarget.is64Bit()) {
359 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
360 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
361 }
362 }
363
364 if (Subtarget.hasLZCNT()) {
365 // When promoting the i8 variants, force them to i32 for a shorter
366 // encoding.
367 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
368 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
369 } else {
370 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
371 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
372 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
373 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
374 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
375 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
376 if (Subtarget.is64Bit()) {
377 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
378 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
379 }
380 }
381
382 // Special handling for half-precision floating point conversions.
383 // If we don't have F16C support, then lower half float conversions
384 // into library calls.
385 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
386 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
387 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
388 }
389
390 // There's never any support for operations beyond MVT::f32.
391 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
392 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
393 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
394 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
395
396 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
397 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
398 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
399 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
400 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
401 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
402
403 if (Subtarget.hasPOPCNT()) {
404 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
405 } else {
406 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
407 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
408 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
409 if (Subtarget.is64Bit())
410 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
411 }
412
413 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
414
415 if (!Subtarget.hasMOVBE())
416 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
417
418 // These should be promoted to a larger select which is supported.
419 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
420 // X86 wants to expand cmov itself.
421 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
422 setOperationAction(ISD::SELECT, VT, Custom);
423 setOperationAction(ISD::SETCC, VT, Custom);
424 }
425 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
426 if (VT == MVT::i64 && !Subtarget.is64Bit())
427 continue;
428 setOperationAction(ISD::SELECT, VT, Custom);
429 setOperationAction(ISD::SETCC, VT, Custom);
430 }
431
432 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
433 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
434 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
435
436 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
437 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
438 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
439 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
440 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
441 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
442 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
443 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
444
445 // Darwin ABI issue.
446 for (auto VT : { MVT::i32, MVT::i64 }) {
447 if (VT == MVT::i64 && !Subtarget.is64Bit())
448 continue;
449 setOperationAction(ISD::ConstantPool , VT, Custom);
450 setOperationAction(ISD::JumpTable , VT, Custom);
451 setOperationAction(ISD::GlobalAddress , VT, Custom);
452 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
453 setOperationAction(ISD::ExternalSymbol , VT, Custom);
454 setOperationAction(ISD::BlockAddress , VT, Custom);
455 }
456
457 // 64-bit shl, sra, srl (iff 32-bit x86)
458 for (auto VT : { MVT::i32, MVT::i64 }) {
459 if (VT == MVT::i64 && !Subtarget.is64Bit())
460 continue;
461 setOperationAction(ISD::SHL_PARTS, VT, Custom);
462 setOperationAction(ISD::SRA_PARTS, VT, Custom);
463 setOperationAction(ISD::SRL_PARTS, VT, Custom);
464 }
465
466 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
467 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
468
469 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
470
471 // Expand certain atomics
472 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
473 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
477 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
478 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
479 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
480 }
481
482 if (Subtarget.hasCmpxchg16b()) {
483 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
484 }
485
486 // FIXME - use subtarget debug flags
487 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
488 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
489 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
490 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
491 }
492
493 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
494 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
495
496 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
497 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
498
499 setOperationAction(ISD::TRAP, MVT::Other, Legal);
500 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
501
502 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
503 setOperationAction(ISD::VASTART , MVT::Other, Custom);
504 setOperationAction(ISD::VAEND , MVT::Other, Expand);
505 bool Is64Bit = Subtarget.is64Bit();
506 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
507 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
508
509 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
510 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
511
512 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
513
514 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
515 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
516 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
517
518 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
519 // f32 and f64 use SSE.
520 // Set up the FP register classes.
521 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
522 : &X86::FR32RegClass);
523 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
524 : &X86::FR64RegClass);
525
526 for (auto VT : { MVT::f32, MVT::f64 }) {
527 // Use ANDPD to simulate FABS.
528 setOperationAction(ISD::FABS, VT, Custom);
529
530 // Use XORP to simulate FNEG.
531 setOperationAction(ISD::FNEG, VT, Custom);
532
533 // Use ANDPD and ORPD to simulate FCOPYSIGN.
534 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
535
536 // We don't support sin/cos/fmod
537 setOperationAction(ISD::FSIN , VT, Expand);
538 setOperationAction(ISD::FCOS , VT, Expand);
539 setOperationAction(ISD::FSINCOS, VT, Expand);
540 }
541
542 // Lower this to MOVMSK plus an AND.
543 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
544 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
545
546 // Expand FP immediates into loads from the stack, except for the special
547 // cases we handle.
548 addLegalFPImmediate(APFloat(+0.0)); // xorpd
549 addLegalFPImmediate(APFloat(+0.0f)); // xorps
550 } else if (UseX87 && X86ScalarSSEf32) {
551 // Use SSE for f32, x87 for f64.
552 // Set up the FP register classes.
553 addRegisterClass(MVT::f32, &X86::FR32RegClass);
554 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
555
556 // Use ANDPS to simulate FABS.
557 setOperationAction(ISD::FABS , MVT::f32, Custom);
558
559 // Use XORP to simulate FNEG.
560 setOperationAction(ISD::FNEG , MVT::f32, Custom);
561
562 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
563
564 // Use ANDPS and ORPS to simulate FCOPYSIGN.
565 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
566 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
567
568 // We don't support sin/cos/fmod
569 setOperationAction(ISD::FSIN , MVT::f32, Expand);
570 setOperationAction(ISD::FCOS , MVT::f32, Expand);
571 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
572
573 // Special cases we handle for FP constants.
574 addLegalFPImmediate(APFloat(+0.0f)); // xorps
575 addLegalFPImmediate(APFloat(+0.0)); // FLD0
576 addLegalFPImmediate(APFloat(+1.0)); // FLD1
577 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
578 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
579
580 // Always expand sin/cos functions even though x87 has an instruction.
581 setOperationAction(ISD::FSIN , MVT::f64, Expand);
582 setOperationAction(ISD::FCOS , MVT::f64, Expand);
583 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
584 } else if (UseX87) {
585 // f32 and f64 in x87.
586 // Set up the FP register classes.
587 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
588 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
589
590 for (auto VT : { MVT::f32, MVT::f64 }) {
591 setOperationAction(ISD::UNDEF, VT, Expand);
592 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
593
594 // Always expand sin/cos functions even though x87 has an instruction.
595 setOperationAction(ISD::FSIN , VT, Expand);
596 setOperationAction(ISD::FCOS , VT, Expand);
597 setOperationAction(ISD::FSINCOS, VT, Expand);
598 }
599 addLegalFPImmediate(APFloat(+0.0)); // FLD0
600 addLegalFPImmediate(APFloat(+1.0)); // FLD1
601 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
602 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
603 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
604 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
605 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
606 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
607 }
608
609 // We don't support FMA.
610 setOperationAction(ISD::FMA, MVT::f64, Expand);
611 setOperationAction(ISD::FMA, MVT::f32, Expand);
612
613 // Long double always uses X87, except f128 in MMX.
614 if (UseX87) {
615 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
616 addRegisterClass(MVT::f128, &X86::VR128RegClass);
617 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
618 setOperationAction(ISD::FABS , MVT::f128, Custom);
619 setOperationAction(ISD::FNEG , MVT::f128, Custom);
620 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
621 }
622
623 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
624 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
625 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
626 {
627 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
628 addLegalFPImmediate(TmpFlt); // FLD0
629 TmpFlt.changeSign();
630 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
631
632 bool ignored;
633 APFloat TmpFlt2(+1.0);
634 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
635 &ignored);
636 addLegalFPImmediate(TmpFlt2); // FLD1
637 TmpFlt2.changeSign();
638 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
639 }
640
641 // Always expand sin/cos functions even though x87 has an instruction.
642 setOperationAction(ISD::FSIN , MVT::f80, Expand);
643 setOperationAction(ISD::FCOS , MVT::f80, Expand);
644 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
645
646 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
647 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
648 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
649 setOperationAction(ISD::FRINT, MVT::f80, Expand);
650 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
651 setOperationAction(ISD::FMA, MVT::f80, Expand);
652 }
653
654 // Always use a library call for pow.
655 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
656 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
657 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
658
659 setOperationAction(ISD::FLOG, MVT::f80, Expand);
660 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
661 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
662 setOperationAction(ISD::FEXP, MVT::f80, Expand);
663 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
664 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
665 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
666
667 // Some FP actions are always expanded for vector types.
668 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
669 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
670 setOperationAction(ISD::FSIN, VT, Expand);
671 setOperationAction(ISD::FSINCOS, VT, Expand);
672 setOperationAction(ISD::FCOS, VT, Expand);
673 setOperationAction(ISD::FREM, VT, Expand);
674 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
675 setOperationAction(ISD::FPOW, VT, Expand);
676 setOperationAction(ISD::FLOG, VT, Expand);
677 setOperationAction(ISD::FLOG2, VT, Expand);
678 setOperationAction(ISD::FLOG10, VT, Expand);
679 setOperationAction(ISD::FEXP, VT, Expand);
680 setOperationAction(ISD::FEXP2, VT, Expand);
681 }
682
683 // First set operation action for all vector types to either promote
684 // (for widening) or expand (for scalarization). Then we will selectively
685 // turn on ones that can be effectively codegen'd.
686 for (MVT VT : MVT::vector_valuetypes()) {
687 setOperationAction(ISD::SDIV, VT, Expand);
688 setOperationAction(ISD::UDIV, VT, Expand);
689 setOperationAction(ISD::SREM, VT, Expand);
690 setOperationAction(ISD::UREM, VT, Expand);
691 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
692 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
693 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
694 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
695 setOperationAction(ISD::FMA, VT, Expand);
696 setOperationAction(ISD::FFLOOR, VT, Expand);
697 setOperationAction(ISD::FCEIL, VT, Expand);
698 setOperationAction(ISD::FTRUNC, VT, Expand);
699 setOperationAction(ISD::FRINT, VT, Expand);
700 setOperationAction(ISD::FNEARBYINT, VT, Expand);
701 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
702 setOperationAction(ISD::MULHS, VT, Expand);
703 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
704 setOperationAction(ISD::MULHU, VT, Expand);
705 setOperationAction(ISD::SDIVREM, VT, Expand);
706 setOperationAction(ISD::UDIVREM, VT, Expand);
707 setOperationAction(ISD::CTPOP, VT, Expand);
708 setOperationAction(ISD::CTTZ, VT, Expand);
709 setOperationAction(ISD::CTLZ, VT, Expand);
710 setOperationAction(ISD::ROTL, VT, Expand);
711 setOperationAction(ISD::ROTR, VT, Expand);
712 setOperationAction(ISD::BSWAP, VT, Expand);
713 setOperationAction(ISD::SETCC, VT, Expand);
714 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
715 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
716 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
717 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
718 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
719 setOperationAction(ISD::TRUNCATE, VT, Expand);
720 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
721 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
722 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
723 setOperationAction(ISD::SELECT_CC, VT, Expand);
724 for (MVT InnerVT : MVT::vector_valuetypes()) {
725 setTruncStoreAction(InnerVT, VT, Expand);
726
727 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
728 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
729
730 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
731 // types, we have to deal with them whether we ask for Expansion or not.
732 // Setting Expand causes its own optimisation problems though, so leave
733 // them legal.
734 if (VT.getVectorElementType() == MVT::i1)
735 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
736
737 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
738 // split/scalarized right now.
739 if (VT.getVectorElementType() == MVT::f16)
740 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
741 }
742 }
743
744 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
745 // with -msoft-float, disable use of MMX as well.
746 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
747 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
748 // No operations on x86mmx supported, everything uses intrinsics.
749 }
750
751 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
752 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
753 : &X86::VR128RegClass);
754
755 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
756 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
757 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
758 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
759 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
760 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
761 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
762 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
763 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
764 }
765
766 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
767 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
768 : &X86::VR128RegClass);
769
770 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
771 // registers cannot be used even for integer operations.
772 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
773 : &X86::VR128RegClass);
774 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
775 : &X86::VR128RegClass);
776 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
777 : &X86::VR128RegClass);
778 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
779 : &X86::VR128RegClass);
780
781 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
782 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
783 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
784 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
785 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
786 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
787 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
788 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
789 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
790 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
791 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
792 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
793 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
794
795 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
796 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
797 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
798 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
799 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
800 }
801
802 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
803 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
804 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
805
806 // Provide custom widening for v2f32 setcc. This is really for VLX when
807 // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
808 // type legalization changing the result type to v4i1 during widening.
809 // It works fine for SSE2 and is probably faster so no need to qualify with
810 // VLX support.
811 setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
812
813 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
814 setOperationAction(ISD::SETCC, VT, Custom);
815 setOperationAction(ISD::CTPOP, VT, Custom);
816 setOperationAction(ISD::CTTZ, VT, Custom);
817
818 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
819 // setcc all the way to isel and prefer SETGT in some isel patterns.
820 setCondCodeAction(ISD::SETLT, VT, Custom);
821 setCondCodeAction(ISD::SETLE, VT, Custom);
822 }
823
824 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
825 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
826 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
827 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
828 setOperationAction(ISD::VSELECT, VT, Custom);
829 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
830 }
831
832 // We support custom legalizing of sext and anyext loads for specific
833 // memory vector types which we can load as a scalar (or sequence of
834 // scalars) and extend in-register to a legal 128-bit vector type. For sext
835 // loads these must work with a single scalar load.
836 for (MVT VT : MVT::integer_vector_valuetypes()) {
837 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
838 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
839 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
840 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
841 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
842 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
843 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
844 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
845 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
846 }
847
848 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
849 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
850 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
851 setOperationAction(ISD::VSELECT, VT, Custom);
852
853 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
854 continue;
855
856 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
857 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
858 }
859
860 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
861 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
862 setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
863 setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
864 setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
865 setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
866 setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
867 }
868
869 // Custom lower v2i64 and v2f64 selects.
870 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
871 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
872
873 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
874 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
875
876 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
877 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
878
879 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
880
881 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
882 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
883
884 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
885 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
886
887 for (MVT VT : MVT::fp_vector_valuetypes())
888 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
889
890 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
891 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
892 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
893 if (!Subtarget.hasAVX512())
894 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
895
896 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
897 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
898 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
899
900 // In the customized shift lowering, the legal v4i32/v2i64 cases
901 // in AVX2 will be recognized.
902 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
903 setOperationAction(ISD::SRL, VT, Custom);
904 setOperationAction(ISD::SHL, VT, Custom);
905 setOperationAction(ISD::SRA, VT, Custom);
906 }
907
908 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
909 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
910 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
911 }
912
913 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
914 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
915 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
916 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
917 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
918 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
919 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
920 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
921 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
922 }
923
924 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
925 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
926 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
927 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
928 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
929 setOperationAction(ISD::FRINT, RoundedTy, Legal);
930 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
931 }
932
933 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
934 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
935 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
936 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
937 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
938 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
939 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
940 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
941
942 // FIXME: Do we need to handle scalar-to-vector here?
943 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
944
945 // We directly match byte blends in the backend as they match the VSELECT
946 // condition form.
947 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
948
949 // SSE41 brings specific instructions for doing vector sign extend even in
950 // cases where we don't have SRA.
951 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
952 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
953 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
954 }
955
956 for (MVT VT : MVT::integer_vector_valuetypes()) {
957 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
958 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
959 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
960 }
961
962 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
963 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
964 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
965 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
966 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
967 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
968 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
969 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
970 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
971 }
972
973 // i8 vectors are custom because the source register and source
974 // source memory operand types are not the same width.
975 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
976 }
977
978 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
979 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
980 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
981 setOperationAction(ISD::ROTL, VT, Custom);
982
983 // XOP can efficiently perform BITREVERSE with VPPERM.
984 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
985 setOperationAction(ISD::BITREVERSE, VT, Custom);
986
987 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
988 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
989 setOperationAction(ISD::BITREVERSE, VT, Custom);
990 }
991
992 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
993 bool HasInt256 = Subtarget.hasInt256();
994
995 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
996 : &X86::VR256RegClass);
997 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
998 : &X86::VR256RegClass);
999 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1000 : &X86::VR256RegClass);
1001 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1002 : &X86::VR256RegClass);
1003 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1004 : &X86::VR256RegClass);
1005 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1006 : &X86::VR256RegClass);
1007
1008 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1009 setOperationAction(ISD::FFLOOR, VT, Legal);
1010 setOperationAction(ISD::FCEIL, VT, Legal);
1011 setOperationAction(ISD::FTRUNC, VT, Legal);
1012 setOperationAction(ISD::FRINT, VT, Legal);
1013 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1014 setOperationAction(ISD::FNEG, VT, Custom);
1015 setOperationAction(ISD::FABS, VT, Custom);
1016 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1017 }
1018
1019 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1020 // even though v8i16 is a legal type.
1021 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1022 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1023 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1024
1025 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1026 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1027
1028 if (!Subtarget.hasAVX512())
1029 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1030
1031 for (MVT VT : MVT::fp_vector_valuetypes())
1032 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1033
1034 // In the customized shift lowering, the legal v8i32/v4i64 cases
1035 // in AVX2 will be recognized.
1036 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1037 setOperationAction(ISD::SRL, VT, Custom);
1038 setOperationAction(ISD::SHL, VT, Custom);
1039 setOperationAction(ISD::SRA, VT, Custom);
1040 }
1041
1042 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1043 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1044 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1045
1046 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1047 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1048 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1049
1050 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1051 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1052 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1053 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1054 }
1055
1056 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1057 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1058 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1059 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1060
1061 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1062 setOperationAction(ISD::SETCC, VT, Custom);
1063 setOperationAction(ISD::CTPOP, VT, Custom);
1064 setOperationAction(ISD::CTTZ, VT, Custom);
1065 setOperationAction(ISD::CTLZ, VT, Custom);
1066
1067 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1068 // setcc all the way to isel and prefer SETGT in some isel patterns.
1069 setCondCodeAction(ISD::SETLT, VT, Custom);
1070 setCondCodeAction(ISD::SETLE, VT, Custom);
1071 }
1072
1073 if (Subtarget.hasAnyFMA()) {
1074 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1075 MVT::v2f64, MVT::v4f64 })
1076 setOperationAction(ISD::FMA, VT, Legal);
1077 }
1078
1079 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1080 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1081 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1082 }
1083
1084 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1085 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1086 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1087 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1088
1089 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1090 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1091
1092 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1093 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1094 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1095 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1096
1097 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1098 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1099 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1100 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1101
1102 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1103 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1104 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1105 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1106 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1107 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1108 }
1109
1110 if (HasInt256) {
1111 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
1112 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
1113 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
1114
1115 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1116 // when we have a 256bit-wide blend with immediate.
1117 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1118
1119 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1120 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1121 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1122 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1123 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1124 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1125 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1126 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1127 }
1128 }
1129
1130 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1131 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1132 setOperationAction(ISD::MLOAD, VT, Legal);
1133 setOperationAction(ISD::MSTORE, VT, Legal);
1134 }
1135
1136 // Extract subvector is special because the value type
1137 // (result) is 128-bit but the source is 256-bit wide.
1138 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1139 MVT::v4f32, MVT::v2f64 }) {
1140 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1141 }
1142
1143 // Custom lower several nodes for 256-bit types.
1144 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1145 MVT::v8f32, MVT::v4f64 }) {
1146 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1147 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1148 setOperationAction(ISD::VSELECT, VT, Custom);
1149 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1150 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1151 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1152 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1153 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1154 }
1155
1156 if (HasInt256)
1157 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1158
1159 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1160 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1161 setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
1162 setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
1163 setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
1164 setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
1165 setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
1166 }
1167
1168 if (HasInt256) {
1169 // Custom legalize 2x32 to get a little better code.
1170 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1171 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1172
1173 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1174 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1175 setOperationAction(ISD::MGATHER, VT, Custom);
1176 }
1177 }
1178
1179 // This block controls legalization of the mask vector sizes that are
1180 // available with AVX512. 512-bit vectors are in a separate block controlled
1181 // by useAVX512Regs.
1182 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1183 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1184 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1185 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1186 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1187 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1188
1189 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1190 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1191 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1192
1193 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1194 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1195 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1196 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1197 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1198 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1199
1200 // There is no byte sized k-register load or store without AVX512DQ.
1201 if (!Subtarget.hasDQI()) {
1202 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1203 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1204 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1205 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1206
1207 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1208 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1209 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1210 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1211 }
1212
1213 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1214 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1215 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1216 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1217 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1218 }
1219
1220 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1221 setOperationAction(ISD::ADD, VT, Custom);
1222 setOperationAction(ISD::SUB, VT, Custom);
1223 setOperationAction(ISD::MUL, VT, Custom);
1224 setOperationAction(ISD::SETCC, VT, Custom);
1225 setOperationAction(ISD::SELECT, VT, Custom);
1226 setOperationAction(ISD::TRUNCATE, VT, Custom);
1227
1228 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1229 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1230 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1231 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1232 setOperationAction(ISD::VSELECT, VT, Expand);
1233 }
1234
1235 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1236 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1237 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1238 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
1239 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1240 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1241 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1242 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1243 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1244 }
1245
1246 // This block controls legalization for 512-bit operations with 32/64 bit
1247 // elements. 512-bits can be disabled based on prefer-vector-width and
1248 // required-vector-width function attributes.
1249 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1250 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1251 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1252 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1253 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1254
1255 for (MVT VT : MVT::fp_vector_valuetypes())
1256 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1257
1258 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1259 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1260 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1261 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1262 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1263 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1264 }
1265
1266 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1267 setOperationAction(ISD::FNEG, VT, Custom);
1268 setOperationAction(ISD::FABS, VT, Custom);
1269 setOperationAction(ISD::FMA, VT, Legal);
1270 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1271 }
1272
1273 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1274 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1275 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1276 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1277 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1278 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1279 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1280 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1281 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1282 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1283
1284 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1285 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1286 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1287 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1288 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1289
1290 if (!Subtarget.hasVLX()) {
1291 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1292 // to 512-bit rather than use the AVX2 instructions so that we can use
1293 // k-masks.
1294 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1295 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1296 setOperationAction(ISD::MLOAD, VT, Custom);
1297 setOperationAction(ISD::MSTORE, VT, Custom);
1298 }
1299 }
1300
1301 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1302 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1303 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1304 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1305 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1306 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1307 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1308 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1309
1310 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1311 setOperationAction(ISD::FFLOOR, VT, Legal);
1312 setOperationAction(ISD::FCEIL, VT, Legal);
1313 setOperationAction(ISD::FTRUNC, VT, Legal);
1314 setOperationAction(ISD::FRINT, VT, Legal);
1315 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1316 }
1317
1318 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
1319 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
1320
1321 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1322 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1323 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1324
1325 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1326 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1327 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1328 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1329
1330 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1331 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1332
1333 setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
1334 setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
1335
1336 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1337 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1338 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1339
1340 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1341 setOperationAction(ISD::SMAX, VT, Legal);
1342 setOperationAction(ISD::UMAX, VT, Legal);
1343 setOperationAction(ISD::SMIN, VT, Legal);
1344 setOperationAction(ISD::UMIN, VT, Legal);
1345 setOperationAction(ISD::ABS, VT, Legal);
1346 setOperationAction(ISD::SRL, VT, Custom);
1347 setOperationAction(ISD::SHL, VT, Custom);
1348 setOperationAction(ISD::SRA, VT, Custom);
1349 setOperationAction(ISD::CTPOP, VT, Custom);
1350 setOperationAction(ISD::CTTZ, VT, Custom);
1351 setOperationAction(ISD::ROTL, VT, Custom);
1352 setOperationAction(ISD::ROTR, VT, Custom);
1353 setOperationAction(ISD::SETCC, VT, Custom);
1354
1355 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1356 // setcc all the way to isel and prefer SETGT in some isel patterns.
1357 setCondCodeAction(ISD::SETLT, VT, Custom);
1358 setCondCodeAction(ISD::SETLE, VT, Custom);
1359 }
1360
1361 // Need to promote to 64-bit even though we have 32-bit masked instructions
1362 // because the IR optimizers rearrange bitcasts around logic ops leaving
1363 // too many variations to handle if we don't promote them.
1364 setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
1365 setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
1366 setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
1367
1368 if (Subtarget.hasDQI()) {
1369 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1370 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1371 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1372 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1373
1374 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1375 }
1376
1377 if (Subtarget.hasCDI()) {
1378 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1379 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1380 setOperationAction(ISD::CTLZ, VT, Legal);
1381 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1382 }
1383 } // Subtarget.hasCDI()
1384
1385 if (Subtarget.hasVPOPCNTDQ()) {
1386 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1387 setOperationAction(ISD::CTPOP, VT, Legal);
1388 }
1389
1390 // Extract subvector is special because the value type
1391 // (result) is 256-bit but the source is 512-bit wide.
1392 // 128-bit was made Legal under AVX1.
1393 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1394 MVT::v8f32, MVT::v4f64 })
1395 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1396
1397 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1398 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1399 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1400 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1401 setOperationAction(ISD::VSELECT, VT, Custom);
1402 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1403 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1404 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1405 setOperationAction(ISD::MLOAD, VT, Legal);
1406 setOperationAction(ISD::MSTORE, VT, Legal);
1407 setOperationAction(ISD::MGATHER, VT, Custom);
1408 setOperationAction(ISD::MSCATTER, VT, Custom);
1409 }
1410 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1411 setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
1412 setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
1413 }
1414
1415 // Need to custom split v32i16/v64i8 bitcasts.
1416 if (!Subtarget.hasBWI()) {
1417 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1418 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1419 }
1420 }// has AVX-512
1421
1422 // This block controls legalization for operations that don't have
1423 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1424 // narrower widths.
1425 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1426 // These operations are handled on non-VLX by artificially widening in
1427 // isel patterns.
1428 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1429
1430 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1431 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1432 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1433 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1434 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1435
1436 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1437 setOperationAction(ISD::SMAX, VT, Legal);
1438 setOperationAction(ISD::UMAX, VT, Legal);
1439 setOperationAction(ISD::SMIN, VT, Legal);
1440 setOperationAction(ISD::UMIN, VT, Legal);
1441 setOperationAction(ISD::ABS, VT, Legal);
1442 }
1443
1444 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1445 setOperationAction(ISD::ROTL, VT, Custom);
1446 setOperationAction(ISD::ROTR, VT, Custom);
1447 }
1448
1449 // Custom legalize 2x32 to get a little better code.
1450 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1451 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1452
1453 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1454 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1455 setOperationAction(ISD::MSCATTER, VT, Custom);
1456
1457 if (Subtarget.hasDQI()) {
1458 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1459 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1460 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1461 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1462 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1463
1464 setOperationAction(ISD::MUL, VT, Legal);
1465 }
1466 }
1467
1468 if (Subtarget.hasCDI()) {
1469 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1470 setOperationAction(ISD::CTLZ, VT, Legal);
1471 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1472 }
1473 } // Subtarget.hasCDI()
1474
1475 if (Subtarget.hasVPOPCNTDQ()) {
1476 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1477 setOperationAction(ISD::CTPOP, VT, Legal);
1478 }
1479 }
1480
1481 // This block control legalization of v32i1/v64i1 which are available with
1482 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1483 // useBWIRegs.
1484 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1485 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1486 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1487
1488 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1489 setOperationAction(ISD::ADD, VT, Custom);
1490 setOperationAction(ISD::SUB, VT, Custom);
1491 setOperationAction(ISD::MUL, VT, Custom);
1492 setOperationAction(ISD::VSELECT, VT, Expand);
1493
1494 setOperationAction(ISD::TRUNCATE, VT, Custom);
1495 setOperationAction(ISD::SETCC, VT, Custom);
1496 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1497 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1498 setOperationAction(ISD::SELECT, VT, Custom);
1499 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1500 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1501 }
1502
1503 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1504 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1505 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1506 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1507 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1508 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1509
1510 // Extends from v32i1 masks to 256-bit vectors.
1511 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1512 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1513 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1514 }
1515
1516 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1517 // disabled based on prefer-vector-width and required-vector-width function
1518 // attributes.
1519 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1520 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1521 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1522
1523 // Extends from v64i1 masks to 512-bit vectors.
1524 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1525 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1526 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1527
1528 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1529 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1530 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1531 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1532 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1533 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1534 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1535 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1536 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1537 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1538 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1539 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1540 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1541 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1542 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1543 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1544 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1545 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1546 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1547 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1548 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1549 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1550 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1551
1552 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1553
1554 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1555
1556 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1557 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1558 setOperationAction(ISD::VSELECT, VT, Custom);
1559 setOperationAction(ISD::ABS, VT, Legal);
1560 setOperationAction(ISD::SRL, VT, Custom);
1561 setOperationAction(ISD::SHL, VT, Custom);
1562 setOperationAction(ISD::SRA, VT, Custom);
1563 setOperationAction(ISD::MLOAD, VT, Legal);
1564 setOperationAction(ISD::MSTORE, VT, Legal);
1565 setOperationAction(ISD::CTPOP, VT, Custom);
1566 setOperationAction(ISD::CTTZ, VT, Custom);
1567 setOperationAction(ISD::CTLZ, VT, Custom);
1568 setOperationAction(ISD::SMAX, VT, Legal);
1569 setOperationAction(ISD::UMAX, VT, Legal);
1570 setOperationAction(ISD::SMIN, VT, Legal);
1571 setOperationAction(ISD::UMIN, VT, Legal);
1572 setOperationAction(ISD::SETCC, VT, Custom);
1573
1574 setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
1575 setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
1576 setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
1577 }
1578
1579 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1580 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1581 }
1582
1583 if (Subtarget.hasBITALG()) {
1584 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1585 setOperationAction(ISD::CTPOP, VT, Legal);
1586 }
1587 }
1588
1589 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1590 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1591 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1592 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1593 }
1594
1595 // These operations are handled on non-VLX by artificially widening in
1596 // isel patterns.
1597 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1598
1599 if (Subtarget.hasBITALG()) {
1600 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1601 setOperationAction(ISD::CTPOP, VT, Legal);
1602 }
1603 }
1604
1605 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1606 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1607 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1608 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1609 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1610 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1611
1612 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1613 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1614 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1615 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1616 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1617
1618 if (Subtarget.hasDQI()) {
1619 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1620 // v2f32 UINT_TO_FP is already custom under SSE2.
1621 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1622 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 1623, __extension__ __PRETTY_FUNCTION__))
1623 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 1623, __extension__ __PRETTY_FUNCTION__))
;
1624 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1625 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1626 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1627 }
1628
1629 if (Subtarget.hasBWI()) {
1630 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1631 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1632 }
1633 }
1634
1635 // We want to custom lower some of our intrinsics.
1636 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1637 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1638 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1639 if (!Subtarget.is64Bit()) {
1640 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1641 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1642 }
1643
1644 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1645 // handle type legalization for these operations here.
1646 //
1647 // FIXME: We really should do custom legalization for addition and
1648 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1649 // than generic legalization for 64-bit multiplication-with-overflow, though.
1650 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1651 if (VT == MVT::i64 && !Subtarget.is64Bit())
1652 continue;
1653 // Add/Sub/Mul with overflow operations are custom lowered.
1654 setOperationAction(ISD::SADDO, VT, Custom);
1655 setOperationAction(ISD::UADDO, VT, Custom);
1656 setOperationAction(ISD::SSUBO, VT, Custom);
1657 setOperationAction(ISD::USUBO, VT, Custom);
1658 setOperationAction(ISD::SMULO, VT, Custom);
1659 setOperationAction(ISD::UMULO, VT, Custom);
1660
1661 // Support carry in as value rather than glue.
1662 setOperationAction(ISD::ADDCARRY, VT, Custom);
1663 setOperationAction(ISD::SUBCARRY, VT, Custom);
1664 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1665 }
1666
1667 if (!Subtarget.is64Bit()) {
1668 // These libcalls are not available in 32-bit.
1669 setLibcallName(RTLIB::SHL_I128, nullptr);
1670 setLibcallName(RTLIB::SRL_I128, nullptr);
1671 setLibcallName(RTLIB::SRA_I128, nullptr);
1672 setLibcallName(RTLIB::MUL_I128, nullptr);
1673 }
1674
1675 // Combine sin / cos into _sincos_stret if it is available.
1676 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1677 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1678 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1679 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1680 }
1681
1682 if (Subtarget.isTargetWin64()) {
1683 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1684 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1685 setOperationAction(ISD::SREM, MVT::i128, Custom);
1686 setOperationAction(ISD::UREM, MVT::i128, Custom);
1687 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1688 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1689 }
1690
1691 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1692 // is. We should promote the value to 64-bits to solve this.
1693 // This is what the CRT headers do - `fmodf` is an inline header
1694 // function casting to f64 and calling `fmod`.
1695 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1696 Subtarget.isTargetWindowsItanium()))
1697 for (ISD::NodeType Op :
1698 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1699 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1700 if (isOperationExpand(Op, MVT::f32))
1701 setOperationAction(Op, MVT::f32, Promote);
1702
1703 // We have target-specific dag combine patterns for the following nodes:
1704 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1705 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1706 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1707 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1708 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1709 setTargetDAGCombine(ISD::BITCAST);
1710 setTargetDAGCombine(ISD::VSELECT);
1711 setTargetDAGCombine(ISD::SELECT);
1712 setTargetDAGCombine(ISD::SHL);
1713 setTargetDAGCombine(ISD::SRA);
1714 setTargetDAGCombine(ISD::SRL);
1715 setTargetDAGCombine(ISD::OR);
1716 setTargetDAGCombine(ISD::AND);
1717 setTargetDAGCombine(ISD::ADD);
1718 setTargetDAGCombine(ISD::FADD);
1719 setTargetDAGCombine(ISD::FSUB);
1720 setTargetDAGCombine(ISD::FNEG);
1721 setTargetDAGCombine(ISD::FMA);
1722 setTargetDAGCombine(ISD::FMINNUM);
1723 setTargetDAGCombine(ISD::FMAXNUM);
1724 setTargetDAGCombine(ISD::SUB);
1725 setTargetDAGCombine(ISD::LOAD);
1726 setTargetDAGCombine(ISD::MLOAD);
1727 setTargetDAGCombine(ISD::STORE);
1728 setTargetDAGCombine(ISD::MSTORE);
1729 setTargetDAGCombine(ISD::TRUNCATE);
1730 setTargetDAGCombine(ISD::ZERO_EXTEND);
1731 setTargetDAGCombine(ISD::ANY_EXTEND);
1732 setTargetDAGCombine(ISD::SIGN_EXTEND);
1733 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1734 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1735 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1736 setTargetDAGCombine(ISD::SINT_TO_FP);
1737 setTargetDAGCombine(ISD::UINT_TO_FP);
1738 setTargetDAGCombine(ISD::SETCC);
1739 setTargetDAGCombine(ISD::MUL);
1740 setTargetDAGCombine(ISD::XOR);
1741 setTargetDAGCombine(ISD::MSCATTER);
1742 setTargetDAGCombine(ISD::MGATHER);
1743
1744 computeRegisterProperties(Subtarget.getRegisterInfo());
1745
1746 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1747 MaxStoresPerMemsetOptSize = 8;
1748 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1749 MaxStoresPerMemcpyOptSize = 4;
1750 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1751 MaxStoresPerMemmoveOptSize = 4;
1752
1753 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1754 // that needs to benchmarked and balanced with the potential use of vector
1755 // load/store types (PR33329, PR33914).
1756 MaxLoadsPerMemcmp = 2;
1757 MaxLoadsPerMemcmpOptSize = 2;
1758
1759 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1760 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1761
1762 // An out-of-order CPU can speculatively execute past a predictable branch,
1763 // but a conditional move could be stalled by an expensive earlier operation.
1764 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1765 EnableExtLdPromotion = true;
1766 setPrefFunctionAlignment(4); // 2^4 bytes.
1767
1768 verifyIntrinsicTables();
1769}
1770
1771// This has so far only been implemented for 64-bit MachO.
1772bool X86TargetLowering::useLoadStackGuardNode() const {
1773 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1774}
1775
1776bool X86TargetLowering::useStackGuardXorFP() const {
1777 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1778 return Subtarget.getTargetTriple().isOSMSVCRT();
1779}
1780
1781SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1782 const SDLoc &DL) const {
1783 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1784 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1785 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1786 return SDValue(Node, 0);
1787}
1788
1789TargetLoweringBase::LegalizeTypeAction
1790X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1791 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1792 return TypeSplitVector;
1793
1794 if (ExperimentalVectorWideningLegalization &&
1795 VT.getVectorNumElements() != 1 &&
1796 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1797 return TypeWidenVector;
1798
1799 return TargetLoweringBase::getPreferredVectorAction(VT);
1800}
1801
1802MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1803 EVT VT) const {
1804 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1805 return MVT::v32i8;
1806 return TargetLowering::getRegisterTypeForCallingConv(Context, VT);
1807}
1808
1809unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1810 EVT VT) const {
1811 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1812 return 1;
1813 return TargetLowering::getNumRegistersForCallingConv(Context, VT);
1814}
1815
1816EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1817 LLVMContext& Context,
1818 EVT VT) const {
1819 if (!VT.isVector())
1820 return MVT::i8;
1821
1822 if (Subtarget.hasAVX512()) {
1823 const unsigned NumElts = VT.getVectorNumElements();
1824
1825 // Figure out what this type will be legalized to.
1826 EVT LegalVT = VT;
1827 while (getTypeAction(Context, LegalVT) != TypeLegal)
1828 LegalVT = getTypeToTransformTo(Context, LegalVT);
1829
1830 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1831 if (LegalVT.getSimpleVT().is512BitVector())
1832 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1833
1834 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1835 // If we legalized to less than a 512-bit vector, then we will use a vXi1
1836 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1837 // vXi16/vXi8.
1838 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1839 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
1840 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1841 }
1842 }
1843
1844 return VT.changeVectorElementTypeToInteger();
1845}
1846
1847/// Helper for getByValTypeAlignment to determine
1848/// the desired ByVal argument alignment.
1849static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1850 if (MaxAlign == 16)
1851 return;
1852 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1853 if (VTy->getBitWidth() == 128)
1854 MaxAlign = 16;
1855 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1856 unsigned EltAlign = 0;
1857 getMaxByValAlign(ATy->getElementType(), EltAlign);
1858 if (EltAlign > MaxAlign)
1859 MaxAlign = EltAlign;
1860 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1861 for (auto *EltTy : STy->elements()) {
1862 unsigned EltAlign = 0;
1863 getMaxByValAlign(EltTy, EltAlign);
1864 if (EltAlign > MaxAlign)
1865 MaxAlign = EltAlign;
1866 if (MaxAlign == 16)
1867 break;
1868 }
1869 }
1870}
1871
1872/// Return the desired alignment for ByVal aggregate
1873/// function arguments in the caller parameter area. For X86, aggregates
1874/// that contain SSE vectors are placed at 16-byte boundaries while the rest
1875/// are at 4-byte boundaries.
1876unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1877 const DataLayout &DL) const {
1878 if (Subtarget.is64Bit()) {
1879 // Max of 8 and alignment of type.
1880 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1881 if (TyAlign > 8)
1882 return TyAlign;
1883 return 8;
1884 }
1885
1886 unsigned Align = 4;
1887 if (Subtarget.hasSSE1())
1888 getMaxByValAlign(Ty, Align);
1889 return Align;
1890}
1891
1892/// Returns the target specific optimal type for load
1893/// and store operations as a result of memset, memcpy, and memmove
1894/// lowering. If DstAlign is zero that means it's safe to destination
1895/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1896/// means there isn't a need to check it against alignment requirement,
1897/// probably because the source does not need to be loaded. If 'IsMemset' is
1898/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1899/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1900/// source is constant so it does not need to be loaded.
1901/// It returns EVT::Other if the type should be determined using generic
1902/// target-independent logic.
1903EVT
1904X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1905 unsigned DstAlign, unsigned SrcAlign,
1906 bool IsMemset, bool ZeroMemset,
1907 bool MemcpyStrSrc,
1908 MachineFunction &MF) const {
1909 const Function &F = MF.getFunction();
1910 if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
1911 if (Size >= 16 &&
1912 (!Subtarget.isUnalignedMem16Slow() ||
1913 ((DstAlign == 0 || DstAlign >= 16) &&
1914 (SrcAlign == 0 || SrcAlign >= 16)))) {
1915 // FIXME: Check if unaligned 32-byte accesses are slow.
1916 if (Size >= 32 && Subtarget.hasAVX()) {
1917 // Although this isn't a well-supported type for AVX1, we'll let
1918 // legalization and shuffle lowering produce the optimal codegen. If we
1919 // choose an optimal type with a vector element larger than a byte,
1920 // getMemsetStores() may create an intermediate splat (using an integer
1921 // multiply) before we splat as a vector.
1922 return MVT::v32i8;
1923 }
1924 if (Subtarget.hasSSE2())
1925 return MVT::v16i8;
1926 // TODO: Can SSE1 handle a byte vector?
1927 if (Subtarget.hasSSE1())
1928 return MVT::v4f32;
1929 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
1930 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
1931 // Do not use f64 to lower memcpy if source is string constant. It's
1932 // better to use i32 to avoid the loads.
1933 // Also, do not use f64 to lower memset unless this is a memset of zeros.
1934 // The gymnastics of splatting a byte value into an XMM register and then
1935 // only using 8-byte stores (because this is a CPU with slow unaligned
1936 // 16-byte accesses) makes that a loser.
1937 return MVT::f64;
1938 }
1939 }
1940 // This is a compromise. If we reach here, unaligned accesses may be slow on
1941 // this target. However, creating smaller, aligned accesses could be even
1942 // slower and would certainly be a lot more code.
1943 if (Subtarget.is64Bit() && Size >= 8)
1944 return MVT::i64;
1945 return MVT::i32;
1946}
1947
1948bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1949 if (VT == MVT::f32)
1950 return X86ScalarSSEf32;
1951 else if (VT == MVT::f64)
1952 return X86ScalarSSEf64;
1953 return true;
1954}
1955
1956bool
1957X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1958 unsigned,
1959 unsigned,
1960 bool *Fast) const {
1961 if (Fast) {
1962 switch (VT.getSizeInBits()) {
1963 default:
1964 // 8-byte and under are always assumed to be fast.
1965 *Fast = true;
1966 break;
1967 case 128:
1968 *Fast = !Subtarget.isUnalignedMem16Slow();
1969 break;
1970 case 256:
1971 *Fast = !Subtarget.isUnalignedMem32Slow();
1972 break;
1973 // TODO: What about AVX-512 (512-bit) accesses?
1974 }
1975 }
1976 // Misaligned accesses of any size are always allowed.
1977 return true;
1978}
1979
1980/// Return the entry encoding for a jump table in the
1981/// current function. The returned value is a member of the
1982/// MachineJumpTableInfo::JTEntryKind enum.
1983unsigned X86TargetLowering::getJumpTableEncoding() const {
1984 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1985 // symbol.
1986 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
1987 return MachineJumpTableInfo::EK_Custom32;
1988
1989 // Otherwise, use the normal jump table encoding heuristics.
1990 return TargetLowering::getJumpTableEncoding();
1991}
1992
1993bool X86TargetLowering::useSoftFloat() const {
1994 return Subtarget.useSoftFloat();
1995}
1996
1997void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
1998 ArgListTy &Args) const {
1999
2000 // Only relabel X86-32 for C / Stdcall CCs.
2001 if (Subtarget.is64Bit())
2002 return;
2003 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2004 return;
2005 unsigned ParamRegs = 0;
2006 if (auto *M = MF->getFunction().getParent())
2007 ParamRegs = M->getNumberRegisterParameters();
2008
2009 // Mark the first N int arguments as having reg
2010 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2011 Type *T = Args[Idx].Ty;
2012 if (T->isIntOrPtrTy())
2013 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2014 unsigned numRegs = 1;
2015 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2016 numRegs = 2;
2017 if (ParamRegs < numRegs)
2018 return;
2019 ParamRegs -= numRegs;
2020 Args[Idx].IsInReg = true;
2021 }
2022 }
2023}
2024
2025const MCExpr *
2026X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2027 const MachineBasicBlock *MBB,
2028 unsigned uid,MCContext &Ctx) const{
2029 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2029, __extension__ __PRETTY_FUNCTION__))
;
2030 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2031 // entries.
2032 return MCSymbolRefExpr::create(MBB->getSymbol(),
2033 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2034}
2035
2036/// Returns relocation base for the given PIC jumptable.
2037SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2038 SelectionDAG &DAG) const {
2039 if (!Subtarget.is64Bit())
2040 // This doesn't have SDLoc associated with it, but is not really the
2041 // same as a Register.
2042 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2043 getPointerTy(DAG.getDataLayout()));
2044 return Table;
2045}
2046
2047/// This returns the relocation base for the given PIC jumptable,
2048/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2049const MCExpr *X86TargetLowering::
2050getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2051 MCContext &Ctx) const {
2052 // X86-64 uses RIP relative addressing based on the jump table label.
2053 if (Subtarget.isPICStyleRIPRel())
2054 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2055
2056 // Otherwise, the reference is relative to the PIC base.
2057 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2058}
2059
2060std::pair<const TargetRegisterClass *, uint8_t>
2061X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2062 MVT VT) const {
2063 const TargetRegisterClass *RRC = nullptr;
2064 uint8_t Cost = 1;
2065 switch (VT.SimpleTy) {
2066 default:
2067 return TargetLowering::findRepresentativeClass(TRI, VT);
2068 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2069 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2070 break;
2071 case MVT::x86mmx:
2072 RRC = &X86::VR64RegClass;
2073 break;
2074 case MVT::f32: case MVT::f64:
2075 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2076 case MVT::v4f32: case MVT::v2f64:
2077 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2078 case MVT::v8f32: case MVT::v4f64:
2079 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2080 case MVT::v16f32: case MVT::v8f64:
2081 RRC = &X86::VR128XRegClass;
2082 break;
2083 }
2084 return std::make_pair(RRC, Cost);
2085}
2086
2087unsigned X86TargetLowering::getAddressSpace() const {
2088 if (Subtarget.is64Bit())
2089 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2090 return 256;
2091}
2092
2093static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2094 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2095 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2096}
2097
2098static Constant* SegmentOffset(IRBuilder<> &IRB,
2099 unsigned Offset, unsigned AddressSpace) {
2100 return ConstantExpr::getIntToPtr(
2101 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2102 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2103}
2104
2105Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2106 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2107 // tcbhead_t; use it instead of the usual global variable (see
2108 // sysdeps/{i386,x86_64}/nptl/tls.h)
2109 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2110 if (Subtarget.isTargetFuchsia()) {
2111 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2112 return SegmentOffset(IRB, 0x10, getAddressSpace());
2113 } else {
2114 // %fs:0x28, unless we're using a Kernel code model, in which case
2115 // it's %gs:0x28. gs:0x14 on i386.
2116 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2117 return SegmentOffset(IRB, Offset, getAddressSpace());
2118 }
2119 }
2120
2121 return TargetLowering::getIRStackGuard(IRB);
2122}
2123
2124void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2125 // MSVC CRT provides functionalities for stack protection.
2126 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2127 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2128 // MSVC CRT has a global variable holding security cookie.
2129 M.getOrInsertGlobal("__security_cookie",
2130 Type::getInt8PtrTy(M.getContext()));
2131
2132 // MSVC CRT has a function to validate security cookie.
2133 auto *SecurityCheckCookie = cast<Function>(
2134 M.getOrInsertFunction("__security_check_cookie",
2135 Type::getVoidTy(M.getContext()),
2136 Type::getInt8PtrTy(M.getContext())));
2137 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2138 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2139 return;
2140 }
2141 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2142 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2143 return;
2144 TargetLowering::insertSSPDeclarations(M);
2145}
2146
2147Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2148 // MSVC CRT has a global variable holding security cookie.
2149 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2150 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2151 return M.getGlobalVariable("__security_cookie");
2152 }
2153 return TargetLowering::getSDagStackGuard(M);
2154}
2155
2156Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2157 // MSVC CRT has a function to validate security cookie.
2158 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2159 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2160 return M.getFunction("__security_check_cookie");
2161 }
2162 return TargetLowering::getSSPStackGuardCheck(M);
2163}
2164
2165Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2166 if (Subtarget.getTargetTriple().isOSContiki())
2167 return getDefaultSafeStackPointerLocation(IRB, false);
2168
2169 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2170 // definition of TLS_SLOT_SAFESTACK in
2171 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2172 if (Subtarget.isTargetAndroid()) {
2173 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2174 // %gs:0x24 on i386
2175 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2176 return SegmentOffset(IRB, Offset, getAddressSpace());
2177 }
2178
2179 // Fuchsia is similar.
2180 if (Subtarget.isTargetFuchsia()) {
2181 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2182 return SegmentOffset(IRB, 0x18, getAddressSpace());
2183 }
2184
2185 return TargetLowering::getSafeStackPointerLocation(IRB);
2186}
2187
2188bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2189 unsigned DestAS) const {
2190 assert(SrcAS != DestAS && "Expected different address spaces!")(static_cast <bool> (SrcAS != DestAS && "Expected different address spaces!"
) ? void (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2190, __extension__ __PRETTY_FUNCTION__))
;
2191
2192 return SrcAS < 256 && DestAS < 256;
2193}
2194
2195//===----------------------------------------------------------------------===//
2196// Return Value Calling Convention Implementation
2197//===----------------------------------------------------------------------===//
2198
2199#include "X86GenCallingConv.inc"
2200
2201bool X86TargetLowering::CanLowerReturn(
2202 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2203 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2204 SmallVector<CCValAssign, 16> RVLocs;
2205 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2206 return CCInfo.CheckReturn(Outs, RetCC_X86);
2207}
2208
2209const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2210 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2211 return ScratchRegs;
2212}
2213
2214/// Lowers masks values (v*i1) to the local register values
2215/// \returns DAG node after lowering to register type
2216static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2217 const SDLoc &Dl, SelectionDAG &DAG) {
2218 EVT ValVT = ValArg.getValueType();
2219
2220 if (ValVT == MVT::v1i1)
2221 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2222 DAG.getIntPtrConstant(0, Dl));
2223
2224 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2225 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2226 // Two stage lowering might be required
2227 // bitcast: v8i1 -> i8 / v16i1 -> i16
2228 // anyextend: i8 -> i32 / i16 -> i32
2229 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2230 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2231 if (ValLoc == MVT::i32)
2232 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2233 return ValToCopy;
2234 }
2235
2236 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2237 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2238 // One stage lowering is required
2239 // bitcast: v32i1 -> i32 / v64i1 -> i64
2240 return DAG.getBitcast(ValLoc, ValArg);
2241 }
2242
2243 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2244}
2245
2246/// Breaks v64i1 value into two registers and adds the new node to the DAG
2247static void Passv64i1ArgInRegs(
2248 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2249 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2250 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2251 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2251, __extension__ __PRETTY_FUNCTION__))
;
2252 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2252, __extension__ __PRETTY_FUNCTION__))
;
2253 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2253, __extension__ __PRETTY_FUNCTION__))
;
2254 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2255, __extension__ __PRETTY_FUNCTION__))
2255 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2255, __extension__ __PRETTY_FUNCTION__))
;
2256
2257 // Before splitting the value we cast it to i64
2258 Arg = DAG.getBitcast(MVT::i64, Arg);
2259
2260 // Splitting the value into two i32 types
2261 SDValue Lo, Hi;
2262 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2263 DAG.getConstant(0, Dl, MVT::i32));
2264 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2265 DAG.getConstant(1, Dl, MVT::i32));
2266
2267 // Attach the two i32 types into corresponding registers
2268 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2269 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2270}
2271
2272SDValue
2273X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2274 bool isVarArg,
2275 const SmallVectorImpl<ISD::OutputArg> &Outs,
2276 const SmallVectorImpl<SDValue> &OutVals,
2277 const SDLoc &dl, SelectionDAG &DAG) const {
2278 MachineFunction &MF = DAG.getMachineFunction();
2279 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2280
2281 // In some cases we need to disable registers from the default CSR list.
2282 // For example, when they are used for argument passing.
2283 bool ShouldDisableCalleeSavedRegister =
2284 CallConv == CallingConv::X86_RegCall ||
2285 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2286
2287 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2288 report_fatal_error("X86 interrupts may not return any value");
2289
2290 SmallVector<CCValAssign, 16> RVLocs;
2291 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2292 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2293
2294 SDValue Flag;
2295 SmallVector<SDValue, 6> RetOps;
2296 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2297 // Operand #1 = Bytes To Pop
2298 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2299 MVT::i32));
2300
2301 // Copy the result values into the output registers.
2302 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2303 ++I, ++OutsIndex) {
2304 CCValAssign &VA = RVLocs[I];
2305 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2305, __extension__ __PRETTY_FUNCTION__))
;
2306
2307 // Add the register to the CalleeSaveDisableRegs list.
2308 if (ShouldDisableCalleeSavedRegister)
2309 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2310
2311 SDValue ValToCopy = OutVals[OutsIndex];
2312 EVT ValVT = ValToCopy.getValueType();
2313
2314 // Promote values to the appropriate types.
2315 if (VA.getLocInfo() == CCValAssign::SExt)
2316 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2317 else if (VA.getLocInfo() == CCValAssign::ZExt)
2318 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2319 else if (VA.getLocInfo() == CCValAssign::AExt) {
2320 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2321 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2322 else
2323 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2324 }
2325 else if (VA.getLocInfo() == CCValAssign::BCvt)
2326 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2327
2328 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2329, __extension__ __PRETTY_FUNCTION__))
2329 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2329, __extension__ __PRETTY_FUNCTION__))
;
2330
2331 // If this is x86-64, and we disabled SSE, we can't return FP values,
2332 // or SSE or MMX vectors.
2333 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2334 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2335 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2336 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2337 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2338 } else if (ValVT == MVT::f64 &&
2339 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2340 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2341 // llvm-gcc has never done it right and no one has noticed, so this
2342 // should be OK for now.
2343 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2344 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2345 }
2346
2347 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2348 // the RET instruction and handled by the FP Stackifier.
2349 if (VA.getLocReg() == X86::FP0 ||
2350 VA.getLocReg() == X86::FP1) {
2351 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2352 // change the value to the FP stack register class.
2353 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2354 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2355 RetOps.push_back(ValToCopy);
2356 // Don't emit a copytoreg.
2357 continue;
2358 }
2359
2360 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2361 // which is returned in RAX / RDX.
2362 if (Subtarget.is64Bit()) {
2363 if (ValVT == MVT::x86mmx) {
2364 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2365 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2366 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2367 ValToCopy);
2368 // If we don't have SSE2 available, convert to v4f32 so the generated
2369 // register is legal.
2370 if (!Subtarget.hasSSE2())
2371 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2372 }
2373 }
2374 }
2375
2376 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2377
2378 if (VA.needsCustom()) {
2379 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2380, __extension__ __PRETTY_FUNCTION__))
2380 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2380, __extension__ __PRETTY_FUNCTION__))
;
2381
2382 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2383 Subtarget);
2384
2385 assert(2 == RegsToPass.size() &&(static_cast <bool> (2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? void (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2386, __extension__ __PRETTY_FUNCTION__))
2386 "Expecting two registers after Pass64BitArgInRegs")(static_cast <bool> (2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? void (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2386, __extension__ __PRETTY_FUNCTION__))
;
2387
2388 // Add the second register to the CalleeSaveDisableRegs list.
2389 if (ShouldDisableCalleeSavedRegister)
2390 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2391 } else {
2392 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2393 }
2394
2395 // Add nodes to the DAG and add the values into the RetOps list
2396 for (auto &Reg : RegsToPass) {
2397 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2398 Flag = Chain.getValue(1);
2399 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2400 }
2401 }
2402
2403 // Swift calling convention does not require we copy the sret argument
2404 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2405
2406 // All x86 ABIs require that for returning structs by value we copy
2407 // the sret argument into %rax/%eax (depending on ABI) for the return.
2408 // We saved the argument into a virtual register in the entry block,
2409 // so now we copy the value out and into %rax/%eax.
2410 //
2411 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2412 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2413 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2414 // either case FuncInfo->setSRetReturnReg() will have been called.
2415 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2416 // When we have both sret and another return value, we should use the
2417 // original Chain stored in RetOps[0], instead of the current Chain updated
2418 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2419
2420 // For the case of sret and another return value, we have
2421 // Chain_0 at the function entry
2422 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2423 // If we use Chain_1 in getCopyFromReg, we will have
2424 // Val = getCopyFromReg(Chain_1)
2425 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2426
2427 // getCopyToReg(Chain_0) will be glued together with
2428 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2429 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2430 // Data dependency from Unit B to Unit A due to usage of Val in
2431 // getCopyToReg(Chain_1, Val)
2432 // Chain dependency from Unit A to Unit B
2433
2434 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2435 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2436 getPointerTy(MF.getDataLayout()));
2437
2438 unsigned RetValReg
2439 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2440 X86::RAX : X86::EAX;
2441 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2442 Flag = Chain.getValue(1);
2443
2444 // RAX/EAX now acts like a return value.
2445 RetOps.push_back(
2446 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2447
2448 // Add the returned register to the CalleeSaveDisableRegs list.
2449 if (ShouldDisableCalleeSavedRegister)
2450 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2451 }
2452
2453 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2454 const MCPhysReg *I =
2455 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2456 if (I) {
2457 for (; *I; ++I) {
2458 if (X86::GR64RegClass.contains(*I))
2459 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2460 else
2461 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2461)
;
2462 }
2463 }
2464
2465 RetOps[0] = Chain; // Update chain.
2466
2467 // Add the flag if we have it.
2468 if (Flag.getNode())
2469 RetOps.push_back(Flag);
2470
2471 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2472 if (CallConv == CallingConv::X86_INTR)
2473 opcode = X86ISD::IRET;
2474 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2475}
2476
2477bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2478 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2479 return false;
2480
2481 SDValue TCChain = Chain;
2482 SDNode *Copy = *N->use_begin();
2483 if (Copy->getOpcode() == ISD::CopyToReg) {
2484 // If the copy has a glue operand, we conservatively assume it isn't safe to
2485 // perform a tail call.
2486 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2487 return false;
2488 TCChain = Copy->getOperand(0);
2489 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2490 return false;
2491
2492 bool HasRet = false;
2493 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2494 UI != UE; ++UI) {
2495 if (UI->getOpcode() != X86ISD::RET_FLAG)
2496 return false;
2497 // If we are returning more than one value, we can definitely
2498 // not make a tail call see PR19530
2499 if (UI->getNumOperands() > 4)
2500 return false;
2501 if (UI->getNumOperands() == 4 &&
2502 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2503 return false;
2504 HasRet = true;
2505 }
2506
2507 if (!HasRet)
2508 return false;
2509
2510 Chain = TCChain;
2511 return true;
2512}
2513
2514EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2515 ISD::NodeType ExtendKind) const {
2516 MVT ReturnMVT = MVT::i32;
2517
2518 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2519 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2520 // The ABI does not require i1, i8 or i16 to be extended.
2521 //
2522 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2523 // always extending i8/i16 return values, so keep doing that for now.
2524 // (PR26665).
2525 ReturnMVT = MVT::i8;
2526 }
2527
2528 EVT MinVT = getRegisterType(Context, ReturnMVT);
2529 return VT.bitsLT(MinVT) ? MinVT : VT;
2530}
2531
2532/// Reads two 32 bit registers and creates a 64 bit mask value.
2533/// \param VA The current 32 bit value that need to be assigned.
2534/// \param NextVA The next 32 bit value that need to be assigned.
2535/// \param Root The parent DAG node.
2536/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2537/// glue purposes. In the case the DAG is already using
2538/// physical register instead of virtual, we should glue
2539/// our new SDValue to InFlag SDvalue.
2540/// \return a new SDvalue of size 64bit.
2541static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2542 SDValue &Root, SelectionDAG &DAG,
2543 const SDLoc &Dl, const X86Subtarget &Subtarget,
2544 SDValue *InFlag = nullptr) {
2545 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2545, __extension__ __PRETTY_FUNCTION__))
;
2546 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2546, __extension__ __PRETTY_FUNCTION__))
;
2547 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2548, __extension__ __PRETTY_FUNCTION__))
2548 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2548, __extension__ __PRETTY_FUNCTION__))
;
2549 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2550, __extension__ __PRETTY_FUNCTION__))
2550 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2550, __extension__ __PRETTY_FUNCTION__))
;
2551 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2552, __extension__ __PRETTY_FUNCTION__))
2552 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2552, __extension__ __PRETTY_FUNCTION__))
;
2553
2554 SDValue Lo, Hi;
2555 unsigned Reg;
2556 SDValue ArgValueLo, ArgValueHi;
2557
2558 MachineFunction &MF = DAG.getMachineFunction();
2559 const TargetRegisterClass *RC = &X86::GR32RegClass;
2560
2561 // Read a 32 bit value from the registers.
2562 if (nullptr == InFlag) {
2563 // When no physical register is present,
2564 // create an intermediate virtual register.
2565 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2566 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2567 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2568 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2569 } else {
2570 // When a physical register is available read the value from it and glue
2571 // the reads together.
2572 ArgValueLo =
2573 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2574 *InFlag = ArgValueLo.getValue(2);
2575 ArgValueHi =
2576 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2577 *InFlag = ArgValueHi.getValue(2);
2578 }
2579
2580 // Convert the i32 type into v32i1 type.
2581 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2582
2583 // Convert the i32 type into v32i1 type.
2584 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2585
2586 // Concatenate the two values together.
2587 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2588}
2589
2590/// The function will lower a register of various sizes (8/16/32/64)
2591/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2592/// \returns a DAG node contains the operand after lowering to mask type.
2593static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2594 const EVT &ValLoc, const SDLoc &Dl,
2595 SelectionDAG &DAG) {
2596 SDValue ValReturned = ValArg;
2597
2598 if (ValVT == MVT::v1i1)
2599 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2600
2601 if (ValVT == MVT::v64i1) {
2602 // In 32 bit machine, this case is handled by getv64i1Argument
2603 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2603, __extension__ __PRETTY_FUNCTION__))
;
2604 // In 64 bit machine, There is no need to truncate the value only bitcast
2605 } else {
2606 MVT maskLen;
2607 switch (ValVT.getSimpleVT().SimpleTy) {
2608 case MVT::v8i1:
2609 maskLen = MVT::i8;
2610 break;
2611 case MVT::v16i1:
2612 maskLen = MVT::i16;
2613 break;
2614 case MVT::v32i1:
2615 maskLen = MVT::i32;
2616 break;
2617 default:
2618 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2618)
;
2619 }
2620
2621 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2622 }
2623 return DAG.getBitcast(ValVT, ValReturned);
2624}
2625
2626/// Lower the result values of a call into the
2627/// appropriate copies out of appropriate physical registers.
2628///
2629SDValue X86TargetLowering::LowerCallResult(
2630 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2631 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2632 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2633 uint32_t *RegMask) const {
2634
2635 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2636 // Assign locations to each value returned by this call.
2637 SmallVector<CCValAssign, 16> RVLocs;
2638 bool Is64Bit = Subtarget.is64Bit();
2639 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2640 *DAG.getContext());
2641 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2642
2643 // Copy all of the result registers out of their specified physreg.
2644 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2645 ++I, ++InsIndex) {
2646 CCValAssign &VA = RVLocs[I];
2647 EVT CopyVT = VA.getLocVT();
2648
2649 // In some calling conventions we need to remove the used registers
2650 // from the register mask.
2651 if (RegMask) {
2652 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2653 SubRegs.isValid(); ++SubRegs)
2654 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2655 }
2656
2657 // If this is x86-64, and we disabled SSE, we can't return FP values
2658 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2659 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2660 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2661 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2662 }
2663
2664 // If we prefer to use the value in xmm registers, copy it out as f80 and
2665 // use a truncate to move it from fp stack reg to xmm reg.
2666 bool RoundAfterCopy = false;
2667 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2668 isScalarFPTypeInSSEReg(VA.getValVT())) {
2669 if (!Subtarget.hasX87())
2670 report_fatal_error("X87 register return with X87 disabled");
2671 CopyVT = MVT::f80;
2672 RoundAfterCopy = (CopyVT != VA.getLocVT());
2673 }
2674
2675 SDValue Val;
2676 if (VA.needsCustom()) {
2677 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2678, __extension__ __PRETTY_FUNCTION__))
2678 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2678, __extension__ __PRETTY_FUNCTION__))
;
2679 Val =
2680 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2681 } else {
2682 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2683 .getValue(1);
2684 Val = Chain.getValue(0);
2685 InFlag = Chain.getValue(2);
2686 }
2687
2688 if (RoundAfterCopy)
2689 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2690 // This truncation won't change the value.
2691 DAG.getIntPtrConstant(1, dl));
2692
2693 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2694 if (VA.getValVT().isVector() &&
2695 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2696 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2697 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2698 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2699 } else
2700 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2701 }
2702
2703 InVals.push_back(Val);
2704 }
2705
2706 return Chain;
2707}
2708
2709//===----------------------------------------------------------------------===//
2710// C & StdCall & Fast Calling Convention implementation
2711//===----------------------------------------------------------------------===//
2712// StdCall calling convention seems to be standard for many Windows' API
2713// routines and around. It differs from C calling convention just a little:
2714// callee should clean up the stack, not caller. Symbols should be also
2715// decorated in some fancy way :) It doesn't support any vector arguments.
2716// For info on fast calling convention see Fast Calling Convention (tail call)
2717// implementation LowerX86_32FastCCCallTo.
2718
2719/// CallIsStructReturn - Determines whether a call uses struct return
2720/// semantics.
2721enum StructReturnType {
2722 NotStructReturn,
2723 RegStructReturn,
2724 StackStructReturn
2725};
2726static StructReturnType
2727callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
2728 if (Outs.empty())
2729 return NotStructReturn;
2730
2731 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2732 if (!Flags.isSRet())
2733 return NotStructReturn;
2734 if (Flags.isInReg() || IsMCU)
2735 return RegStructReturn;
2736 return StackStructReturn;
2737}
2738
2739/// Determines whether a function uses struct return semantics.
2740static StructReturnType
2741argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
2742 if (Ins.empty())
2743 return NotStructReturn;
2744
2745 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2746 if (!Flags.isSRet())
2747 return NotStructReturn;
2748 if (Flags.isInReg() || IsMCU)
2749 return RegStructReturn;
2750 return StackStructReturn;
2751}
2752
2753/// Make a copy of an aggregate at address specified by "Src" to address
2754/// "Dst" with size and alignment information specified by the specific
2755/// parameter attribute. The copy will be passed as a byval function parameter.
2756static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2757 SDValue Chain, ISD::ArgFlagsTy Flags,
2758 SelectionDAG &DAG, const SDLoc &dl) {
2759 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2760
2761 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2762 /*isVolatile*/false, /*AlwaysInline=*/true,
2763 /*isTailCall*/false,
2764 MachinePointerInfo(), MachinePointerInfo());
2765}
2766
2767/// Return true if the calling convention is one that we can guarantee TCO for.
2768static bool canGuaranteeTCO(CallingConv::ID CC) {
2769 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2770 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2771 CC == CallingConv::HHVM);
2772}
2773
2774/// Return true if we might ever do TCO for calls with this calling convention.
2775static bool mayTailCallThisCC(CallingConv::ID CC) {
2776 switch (CC) {
2777 // C calling conventions:
2778 case CallingConv::C:
2779 case CallingConv::Win64:
2780 case CallingConv::X86_64_SysV:
2781 // Callee pop conventions:
2782 case CallingConv::X86_ThisCall:
2783 case CallingConv::X86_StdCall:
2784 case CallingConv::X86_VectorCall:
2785 case CallingConv::X86_FastCall:
2786 return true;
2787 default:
2788 return canGuaranteeTCO(CC);
2789 }
2790}
2791
2792/// Return true if the function is being made into a tailcall target by
2793/// changing its ABI.
2794static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2795 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2796}
2797
2798bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2799 auto Attr =
2800 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2801 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2802 return false;
2803
2804 ImmutableCallSite CS(CI);
2805 CallingConv::ID CalleeCC = CS.getCallingConv();
2806 if (!mayTailCallThisCC(CalleeCC))
2807 return false;
2808
2809 return true;
2810}
2811
2812SDValue
2813X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2814 const SmallVectorImpl<ISD::InputArg> &Ins,
2815 const SDLoc &dl, SelectionDAG &DAG,
2816 const CCValAssign &VA,
2817 MachineFrameInfo &MFI, unsigned i) const {
2818 // Create the nodes corresponding to a load from this parameter slot.
2819 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2820 bool AlwaysUseMutable = shouldGuaranteeTCO(
2821 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2822 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2823 EVT ValVT;
2824 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2825
2826 // If value is passed by pointer we have address passed instead of the value
2827 // itself. No need to extend if the mask value and location share the same
2828 // absolute size.
2829 bool ExtendedInMem =
2830 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2831 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2832
2833 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2834 ValVT = VA.getLocVT();
2835 else
2836 ValVT = VA.getValVT();
2837
2838 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2839 // taken by a return address.
2840 int Offset = 0;
2841 if (CallConv == CallingConv::X86_INTR) {
2842 // X86 interrupts may take one or two arguments.
2843 // On the stack there will be no return address as in regular call.
2844 // Offset of last argument need to be set to -4/-8 bytes.
2845 // Where offset of the first argument out of two, should be set to 0 bytes.
2846 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2847 if (Subtarget.is64Bit() && Ins.size() == 2) {
2848 // The stack pointer needs to be realigned for 64 bit handlers with error
2849 // code, so the argument offset changes by 8 bytes.
2850 Offset += 8;
2851 }
2852 }
2853
2854 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2855 // changed with more analysis.
2856 // In case of tail call optimization mark all arguments mutable. Since they
2857 // could be overwritten by lowering of arguments in case of a tail call.
2858 if (Flags.isByVal()) {
2859 unsigned Bytes = Flags.getByValSize();
2860 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2861
2862 // FIXME: For now, all byval parameter objects are marked as aliasing. This
2863 // can be improved with deeper analysis.
2864 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
2865 /*isAliased=*/true);
2866 // Adjust SP offset of interrupt parameter.
2867 if (CallConv == CallingConv::X86_INTR) {
2868 MFI.setObjectOffset(FI, Offset);
2869 }
2870 return DAG.getFrameIndex(FI, PtrVT);
2871 }
2872
2873 // This is an argument in memory. We might be able to perform copy elision.
2874 if (Flags.isCopyElisionCandidate()) {
2875 EVT ArgVT = Ins[i].ArgVT;
2876 SDValue PartAddr;
2877 if (Ins[i].PartOffset == 0) {
2878 // If this is a one-part value or the first part of a multi-part value,
2879 // create a stack object for the entire argument value type and return a
2880 // load from our portion of it. This assumes that if the first part of an
2881 // argument is in memory, the rest will also be in memory.
2882 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
2883 /*Immutable=*/false);
2884 PartAddr = DAG.getFrameIndex(FI, PtrVT);
2885 return DAG.getLoad(
2886 ValVT, dl, Chain, PartAddr,
2887 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2888 } else {
2889 // This is not the first piece of an argument in memory. See if there is
2890 // already a fixed stack object including this offset. If so, assume it
2891 // was created by the PartOffset == 0 branch above and create a load from
2892 // the appropriate offset into it.
2893 int64_t PartBegin = VA.getLocMemOffset();
2894 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
2895 int FI = MFI.getObjectIndexBegin();
2896 for (; MFI.isFixedObjectIndex(FI); ++FI) {
2897 int64_t ObjBegin = MFI.getObjectOffset(FI);
2898 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
2899 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
2900 break;
2901 }
2902 if (MFI.isFixedObjectIndex(FI)) {
2903 SDValue Addr =
2904 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
2905 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
2906 return DAG.getLoad(
2907 ValVT, dl, Chain, Addr,
2908 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
2909 Ins[i].PartOffset));
2910 }
2911 }
2912 }
2913
2914 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2915 VA.getLocMemOffset(), isImmutable);
2916
2917 // Set SExt or ZExt flag.
2918 if (VA.getLocInfo() == CCValAssign::ZExt) {
2919 MFI.setObjectZExt(FI, true);
2920 } else if (VA.getLocInfo() == CCValAssign::SExt) {
2921 MFI.setObjectSExt(FI, true);
2922 }
2923
2924 // Adjust SP offset of interrupt parameter.
2925 if (CallConv == CallingConv::X86_INTR) {
2926 MFI.setObjectOffset(FI, Offset);
2927 }
2928
2929 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2930 SDValue Val = DAG.getLoad(
2931 ValVT, dl, Chain, FIN,
2932 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2933 return ExtendedInMem
2934 ? (VA.getValVT().isVector()
2935 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
2936 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
2937 : Val;
2938}
2939
2940// FIXME: Get this from tablegen.
2941static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2942 const X86Subtarget &Subtarget) {
2943 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2943, __extension__ __PRETTY_FUNCTION__))
;
2944
2945 if (Subtarget.isCallingConvWin64(CallConv)) {
2946 static const MCPhysReg GPR64ArgRegsWin64[] = {
2947 X86::RCX, X86::RDX, X86::R8, X86::R9
2948 };
2949 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2950 }
2951
2952 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2953 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2954 };
2955 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2956}
2957
2958// FIXME: Get this from tablegen.
2959static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2960 CallingConv::ID CallConv,
2961 const X86Subtarget &Subtarget) {
2962 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2962, __extension__ __PRETTY_FUNCTION__))
;
2963 if (Subtarget.isCallingConvWin64(CallConv)) {
2964 // The XMM registers which might contain var arg parameters are shadowed
2965 // in their paired GPR. So we only need to save the GPR to their home
2966 // slots.
2967 // TODO: __vectorcall will change this.
2968 return None;
2969 }
2970
2971 const Function &F = MF.getFunction();
2972 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
2973 bool isSoftFloat = Subtarget.useSoftFloat();
2974 assert(!(isSoftFloat && NoImplicitFloatOps) &&(static_cast <bool> (!(isSoftFloat && NoImplicitFloatOps
) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2975, __extension__ __PRETTY_FUNCTION__))
2975 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(isSoftFloat && NoImplicitFloatOps
) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 2975, __extension__ __PRETTY_FUNCTION__))
;
2976 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
2977 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2978 // registers.
2979 return None;
2980
2981 static const MCPhysReg XMMArgRegs64Bit[] = {
2982 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2983 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2984 };
2985 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2986}
2987
2988#ifndef NDEBUG
2989static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
2990 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2991 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2992 return A.getValNo() < B.getValNo();
2993 });
2994}
2995#endif
2996
2997SDValue X86TargetLowering::LowerFormalArguments(
2998 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2999 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3000 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3001 MachineFunction &MF = DAG.getMachineFunction();
3002 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3003 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3004
3005 const Function &F = MF.getFunction();
3006 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3007 F.getName() == "main")
3008 FuncInfo->setForceFramePointer(true);
3009
3010 MachineFrameInfo &MFI = MF.getFrameInfo();
3011 bool Is64Bit = Subtarget.is64Bit();
3012 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3013
3014 assert((static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3016, __extension__ __PRETTY_FUNCTION__))
3015 !(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3016, __extension__ __PRETTY_FUNCTION__))
3016 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3016, __extension__ __PRETTY_FUNCTION__))
;
3017
3018 if (CallConv == CallingConv::X86_INTR) {
3019 bool isLegal = Ins.size() == 1 ||
3020 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
3021 (!Is64Bit && Ins[1].VT == MVT::i32)));
3022 if (!isLegal)
3023 report_fatal_error("X86 interrupts may take one or two arguments");
3024 }
3025
3026 // Assign locations to all of the incoming arguments.
3027 SmallVector<CCValAssign, 16> ArgLocs;
3028 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3029
3030 // Allocate shadow area for Win64.
3031 if (IsWin64)
3032 CCInfo.AllocateStack(32, 8);
3033
3034 CCInfo.AnalyzeArguments(Ins, CC_X86);
3035
3036 // In vectorcall calling convention a second pass is required for the HVA
3037 // types.
3038 if (CallingConv::X86_VectorCall == CallConv) {
3039 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3040 }
3041
3042 // The next loop assumes that the locations are in the same order of the
3043 // input arguments.
3044 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3045, __extension__ __PRETTY_FUNCTION__))
3045 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3045, __extension__ __PRETTY_FUNCTION__))
;
3046
3047 SDValue ArgValue;
3048 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3049 ++I, ++InsIndex) {
3050 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3050, __extension__ __PRETTY_FUNCTION__))
;
3051 CCValAssign &VA = ArgLocs[I];
3052
3053 if (VA.isRegLoc()) {
3054 EVT RegVT = VA.getLocVT();
3055 if (VA.needsCustom()) {
3056 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3058, __extension__ __PRETTY_FUNCTION__))
3057 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3058, __extension__ __PRETTY_FUNCTION__))
3058 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3058, __extension__ __PRETTY_FUNCTION__))
;
3059
3060 // v64i1 values, in regcall calling convention, that are
3061 // compiled to 32 bit arch, are split up into two registers.
3062 ArgValue =
3063 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3064 } else {
3065 const TargetRegisterClass *RC;
3066 if (RegVT == MVT::i8)
3067 RC = &X86::GR8RegClass;
3068 else if (RegVT == MVT::i16)
3069 RC = &X86::GR16RegClass;
3070 else if (RegVT == MVT::i32)
3071 RC = &X86::GR32RegClass;
3072 else if (Is64Bit && RegVT == MVT::i64)
3073 RC = &X86::GR64RegClass;
3074 else if (RegVT == MVT::f32)
3075 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3076 else if (RegVT == MVT::f64)
3077 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3078 else if (RegVT == MVT::f80)
3079 RC = &X86::RFP80RegClass;
3080 else if (RegVT == MVT::f128)
3081 RC = &X86::VR128RegClass;
3082 else if (RegVT.is512BitVector())
3083 RC = &X86::VR512RegClass;
3084 else if (RegVT.is256BitVector())
3085 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3086 else if (RegVT.is128BitVector())
3087 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3088 else if (RegVT == MVT::x86mmx)
3089 RC = &X86::VR64RegClass;
3090 else if (RegVT == MVT::v1i1)
3091 RC = &X86::VK1RegClass;
3092 else if (RegVT == MVT::v8i1)
3093 RC = &X86::VK8RegClass;
3094 else if (RegVT == MVT::v16i1)
3095 RC = &X86::VK16RegClass;
3096 else if (RegVT == MVT::v32i1)
3097 RC = &X86::VK32RegClass;
3098 else if (RegVT == MVT::v64i1)
3099 RC = &X86::VK64RegClass;
3100 else
3101 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3101)
;
3102
3103 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3104 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3105 }
3106
3107 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3108 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3109 // right size.
3110 if (VA.getLocInfo() == CCValAssign::SExt)
3111 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3112 DAG.getValueType(VA.getValVT()));
3113 else if (VA.getLocInfo() == CCValAssign::ZExt)
3114 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3115 DAG.getValueType(VA.getValVT()));
3116 else if (VA.getLocInfo() == CCValAssign::BCvt)
3117 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3118
3119 if (VA.isExtInLoc()) {
3120 // Handle MMX values passed in XMM regs.
3121 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3122 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3123 else if (VA.getValVT().isVector() &&
3124 VA.getValVT().getScalarType() == MVT::i1 &&
3125 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3126 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3127 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3128 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3129 } else
3130 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3131 }
3132 } else {
3133 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3133, __extension__ __PRETTY_FUNCTION__))
;
3134 ArgValue =
3135 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3136 }
3137
3138 // If value is passed via pointer - do a load.
3139 if (VA.getLocInfo() == CCValAssign::Indirect)
3140 ArgValue =
3141 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3142
3143 InVals.push_back(ArgValue);
3144 }
3145
3146 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3147 // Swift calling convention does not require we copy the sret argument
3148 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3149 if (CallConv == CallingConv::Swift)
3150 continue;
3151
3152 // All x86 ABIs require that for returning structs by value we copy the
3153 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3154 // the argument into a virtual register so that we can access it from the
3155 // return points.
3156 if (Ins[I].Flags.isSRet()) {
3157 unsigned Reg = FuncInfo->getSRetReturnReg();
3158 if (!Reg) {
3159 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3160 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3161 FuncInfo->setSRetReturnReg(Reg);
3162 }
3163 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3164 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3165 break;
3166 }
3167 }
3168
3169 unsigned StackSize = CCInfo.getNextStackOffset();
3170 // Align stack specially for tail calls.
3171 if (shouldGuaranteeTCO(CallConv,
3172 MF.getTarget().Options.GuaranteedTailCallOpt))
3173 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3174
3175 // If the function takes variable number of arguments, make a frame index for
3176 // the start of the first vararg value... for expansion of llvm.va_start. We
3177 // can skip this if there are no va_start calls.
3178 if (MFI.hasVAStart() &&
3179 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3180 CallConv != CallingConv::X86_ThisCall))) {
3181 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3182 }
3183
3184 // Figure out if XMM registers are in use.
3185 assert(!(Subtarget.useSoftFloat() &&(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3187, __extension__ __PRETTY_FUNCTION__))
3186 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3187, __extension__ __PRETTY_FUNCTION__))
3187 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3187, __extension__ __PRETTY_FUNCTION__))
;
3188
3189 // 64-bit calling conventions support varargs and register parameters, so we
3190 // have to do extra work to spill them in the prologue.
3191 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3192 // Find the first unallocated argument registers.
3193 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3194 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3195 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3196 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3197 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3198, __extension__ __PRETTY_FUNCTION__))
3198 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3198, __extension__ __PRETTY_FUNCTION__))
;
3199
3200 // Gather all the live in physical registers.
3201 SmallVector<SDValue, 6> LiveGPRs;
3202 SmallVector<SDValue, 8> LiveXMMRegs;
3203 SDValue ALVal;
3204 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3205 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3206 LiveGPRs.push_back(
3207 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3208 }
3209 if (!ArgXMMs.empty()) {
3210 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3211 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3212 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3213 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3214 LiveXMMRegs.push_back(
3215 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3216 }
3217 }
3218
3219 if (IsWin64) {
3220 // Get to the caller-allocated home save location. Add 8 to account
3221 // for the return address.
3222 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3223 FuncInfo->setRegSaveFrameIndex(
3224 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3225 // Fixup to set vararg frame on shadow area (4 x i64).
3226 if (NumIntRegs < 4)
3227 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3228 } else {
3229 // For X86-64, if there are vararg parameters that are passed via
3230 // registers, then we must store them to their spots on the stack so
3231 // they may be loaded by dereferencing the result of va_next.
3232 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3233 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3234 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3235 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3236 }
3237
3238 // Store the integer parameter registers.
3239 SmallVector<SDValue, 8> MemOps;
3240 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3241 getPointerTy(DAG.getDataLayout()));
3242 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3243 for (SDValue Val : LiveGPRs) {
3244 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3245 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3246 SDValue Store =
3247 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3248 MachinePointerInfo::getFixedStack(
3249 DAG.getMachineFunction(),
3250 FuncInfo->getRegSaveFrameIndex(), Offset));
3251 MemOps.push_back(Store);
3252 Offset += 8;
3253 }
3254
3255 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3256 // Now store the XMM (fp + vector) parameter registers.
3257 SmallVector<SDValue, 12> SaveXMMOps;
3258 SaveXMMOps.push_back(Chain);
3259 SaveXMMOps.push_back(ALVal);
3260 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3261 FuncInfo->getRegSaveFrameIndex(), dl));
3262 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3263 FuncInfo->getVarArgsFPOffset(), dl));
3264 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3265 LiveXMMRegs.end());
3266 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3267 MVT::Other, SaveXMMOps));
3268 }
3269
3270 if (!MemOps.empty())
3271 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3272 }
3273
3274 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3275 // Find the largest legal vector type.
3276 MVT VecVT = MVT::Other;
3277 // FIXME: Only some x86_32 calling conventions support AVX512.
3278 if (Subtarget.hasAVX512() &&
3279 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3280 CallConv == CallingConv::Intel_OCL_BI)))
3281 VecVT = MVT::v16f32;
3282 else if (Subtarget.hasAVX())
3283 VecVT = MVT::v8f32;
3284 else if (Subtarget.hasSSE2())
3285 VecVT = MVT::v4f32;
3286
3287 // We forward some GPRs and some vector types.
3288 SmallVector<MVT, 2> RegParmTypes;
3289 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3290 RegParmTypes.push_back(IntVT);
3291 if (VecVT != MVT::Other)
3292 RegParmTypes.push_back(VecVT);
3293
3294 // Compute the set of forwarded registers. The rest are scratch.
3295 SmallVectorImpl<ForwardedRegister> &Forwards =
3296 FuncInfo->getForwardedMustTailRegParms();
3297 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3298
3299 // Conservatively forward AL on x86_64, since it might be used for varargs.
3300 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3301 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3302 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3303 }
3304
3305 // Copy all forwards from physical to virtual registers.
3306 for (ForwardedRegister &F : Forwards) {
3307 // FIXME: Can we use a less constrained schedule?
3308 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3309 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3310 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3311 }
3312 }
3313
3314 // Some CCs need callee pop.
3315 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3316 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3317 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3318 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3319 // X86 interrupts must pop the error code (and the alignment padding) if
3320 // present.
3321 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3322 } else {
3323 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3324 // If this is an sret function, the return should pop the hidden pointer.
3325 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3326 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3327 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3328 FuncInfo->setBytesToPopOnReturn(4);
3329 }
3330
3331 if (!Is64Bit) {
3332 // RegSaveFrameIndex is X86-64 only.
3333 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3334 if (CallConv == CallingConv::X86_FastCall ||
3335 CallConv == CallingConv::X86_ThisCall)
3336 // fastcc functions can't have varargs.
3337 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3338 }
3339
3340 FuncInfo->setArgumentStackSize(StackSize);
3341
3342 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3343 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3344 if (Personality == EHPersonality::CoreCLR) {
3345 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3345, __extension__ __PRETTY_FUNCTION__))
;
3346 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3347 // that we'd prefer this slot be allocated towards the bottom of the frame
3348 // (i.e. near the stack pointer after allocating the frame). Every
3349 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3350 // offset from the bottom of this and each funclet's frame must be the
3351 // same, so the size of funclets' (mostly empty) frames is dictated by
3352 // how far this slot is from the bottom (since they allocate just enough
3353 // space to accommodate holding this slot at the correct offset).
3354 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3355 EHInfo->PSPSymFrameIdx = PSPSymFI;
3356 }
3357 }
3358
3359 if (CallConv == CallingConv::X86_RegCall ||
3360 F.hasFnAttribute("no_caller_saved_registers")) {
3361 MachineRegisterInfo &MRI = MF.getRegInfo();
3362 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3363 MRI.disableCalleeSavedRegister(Pair.first);
3364 }
3365
3366 return Chain;
3367}
3368
3369SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3370 SDValue Arg, const SDLoc &dl,
3371 SelectionDAG &DAG,
3372 const CCValAssign &VA,
3373 ISD::ArgFlagsTy Flags) const {
3374 unsigned LocMemOffset = VA.getLocMemOffset();
3375 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3376 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3377 StackPtr, PtrOff);
3378 if (Flags.isByVal())
3379 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3380
3381 return DAG.getStore(
3382 Chain, dl, Arg, PtrOff,
3383 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3384}
3385
3386/// Emit a load of return address if tail call
3387/// optimization is performed and it is required.
3388SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3389 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3390 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3391 // Adjust the Return address stack slot.
3392 EVT VT = getPointerTy(DAG.getDataLayout());
3393 OutRetAddr = getReturnAddressFrameIndex(DAG);
3394
3395 // Load the "old" Return address.
3396 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3397 return SDValue(OutRetAddr.getNode(), 1);
3398}
3399
3400/// Emit a store of the return address if tail call
3401/// optimization is performed and it is required (FPDiff!=0).
3402static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3403 SDValue Chain, SDValue RetAddrFrIdx,
3404 EVT PtrVT, unsigned SlotSize,
3405 int FPDiff, const SDLoc &dl) {
3406 // Store the return address to the appropriate stack slot.
3407 if (!FPDiff) return Chain;
3408 // Calculate the new stack slot for the return address.
3409 int NewReturnAddrFI =
3410 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3411 false);
3412 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3413 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3414 MachinePointerInfo::getFixedStack(
3415 DAG.getMachineFunction(), NewReturnAddrFI));
3416 return Chain;
3417}
3418
3419/// Returns a vector_shuffle mask for an movs{s|d}, movd
3420/// operation of specified width.
3421static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3422 SDValue V2) {
3423 unsigned NumElems = VT.getVectorNumElements();
3424 SmallVector<int, 8> Mask;
3425 Mask.push_back(NumElems);
3426 for (unsigned i = 1; i != NumElems; ++i)
3427 Mask.push_back(i);
3428 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3429}
3430
3431SDValue
3432X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3433 SmallVectorImpl<SDValue> &InVals) const {
3434 SelectionDAG &DAG = CLI.DAG;
3435 SDLoc &dl = CLI.DL;
3436 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3437 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3438 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3439 SDValue Chain = CLI.Chain;
3440 SDValue Callee = CLI.Callee;
3441 CallingConv::ID CallConv = CLI.CallConv;
3442 bool &isTailCall = CLI.IsTailCall;
3443 bool isVarArg = CLI.IsVarArg;
3444
3445 MachineFunction &MF = DAG.getMachineFunction();
3446 bool Is64Bit = Subtarget.is64Bit();
3447 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3448 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3449 bool IsSibcall = false;
3450 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3451 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3452 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3453 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3454 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3455 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3456 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3457 bool HasNoCfCheck =
3458 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3459 const Module *M = MF.getMMI().getModule();
3460 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3461
3462 if (CallConv == CallingConv::X86_INTR)
3463 report_fatal_error("X86 interrupts may not be called directly");
3464
3465 if (Attr.getValueAsString() == "true")
3466 isTailCall = false;
3467
3468 if (Subtarget.isPICStyleGOT() &&
3469 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3470 // If we are using a GOT, disable tail calls to external symbols with
3471 // default visibility. Tail calling such a symbol requires using a GOT
3472 // relocation, which forces early binding of the symbol. This breaks code
3473 // that require lazy function symbol resolution. Using musttail or
3474 // GuaranteedTailCallOpt will override this.
3475 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3476 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3477 G->getGlobal()->hasDefaultVisibility()))
3478 isTailCall = false;
3479 }
3480
3481 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3482 if (IsMustTail) {
3483 // Force this to be a tail call. The verifier rules are enough to ensure
3484 // that we can lower this successfully without moving the return address
3485 // around.
3486 isTailCall = true;
3487 } else if (isTailCall) {
3488 // Check if it's really possible to do a tail call.
3489 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3490 isVarArg, SR != NotStructReturn,
3491 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3492 Outs, OutVals, Ins, DAG);
3493
3494 // Sibcalls are automatically detected tailcalls which do not require
3495 // ABI changes.
3496 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3497 IsSibcall = true;
3498
3499 if (isTailCall)
3500 ++NumTailCalls;
3501 }
3502
3503 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3504, __extension__ __PRETTY_FUNCTION__))
3504 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3504, __extension__ __PRETTY_FUNCTION__))
;
3505
3506 // Analyze operands of the call, assigning locations to each operand.
3507 SmallVector<CCValAssign, 16> ArgLocs;
3508 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3509
3510 // Allocate shadow area for Win64.
3511 if (IsWin64)
3512 CCInfo.AllocateStack(32, 8);
3513
3514 CCInfo.AnalyzeArguments(Outs, CC_X86);
3515
3516 // In vectorcall calling convention a second pass is required for the HVA
3517 // types.
3518 if (CallingConv::X86_VectorCall == CallConv) {
3519 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3520 }
3521
3522 // Get a count of how many bytes are to be pushed on the stack.
3523 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3524 if (IsSibcall)
3525 // This is a sibcall. The memory operands are available in caller's
3526 // own caller's stack.
3527 NumBytes = 0;
3528 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3529 canGuaranteeTCO(CallConv))
3530 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3531
3532 int FPDiff = 0;
3533 if (isTailCall && !IsSibcall && !IsMustTail) {
3534 // Lower arguments at fp - stackoffset + fpdiff.
3535 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3536
3537 FPDiff = NumBytesCallerPushed - NumBytes;
3538
3539 // Set the delta of movement of the returnaddr stackslot.
3540 // But only set if delta is greater than previous delta.
3541 if (FPDiff < X86Info->getTCReturnAddrDelta())
3542 X86Info->setTCReturnAddrDelta(FPDiff);
3543 }
3544
3545 unsigned NumBytesToPush = NumBytes;
3546 unsigned NumBytesToPop = NumBytes;
3547
3548 // If we have an inalloca argument, all stack space has already been allocated
3549 // for us and be right at the top of the stack. We don't support multiple
3550 // arguments passed in memory when using inalloca.
3551 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3552 NumBytesToPush = 0;
3553 if (!ArgLocs.back().isMemLoc())
3554 report_fatal_error("cannot use inalloca attribute on a register "
3555 "parameter");
3556 if (ArgLocs.back().getLocMemOffset() != 0)
3557 report_fatal_error("any parameter with the inalloca attribute must be "
3558 "the only memory argument");
3559 }
3560
3561 if (!IsSibcall)
3562 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3563 NumBytes - NumBytesToPush, dl);
3564
3565 SDValue RetAddrFrIdx;
3566 // Load return address for tail calls.
3567 if (isTailCall && FPDiff)
3568 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3569 Is64Bit, FPDiff, dl);
3570
3571 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3572 SmallVector<SDValue, 8> MemOpChains;
3573 SDValue StackPtr;
3574
3575 // The next loop assumes that the locations are in the same order of the
3576 // input arguments.
3577 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3578, __extension__ __PRETTY_FUNCTION__))
3578 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3578, __extension__ __PRETTY_FUNCTION__))
;
3579
3580 // Walk the register/memloc assignments, inserting copies/loads. In the case
3581 // of tail call optimization arguments are handle later.
3582 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3583 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3584 ++I, ++OutIndex) {
3585 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3585, __extension__ __PRETTY_FUNCTION__))
;
3586 // Skip inalloca arguments, they have already been written.
3587 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3588 if (Flags.isInAlloca())
3589 continue;
3590
3591 CCValAssign &VA = ArgLocs[I];
3592 EVT RegVT = VA.getLocVT();
3593 SDValue Arg = OutVals[OutIndex];
3594 bool isByVal = Flags.isByVal();
3595
3596 // Promote the value if needed.
3597 switch (VA.getLocInfo()) {
3598 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3598)
;
3599 case CCValAssign::Full: break;
3600 case CCValAssign::SExt:
3601 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3602 break;
3603 case CCValAssign::ZExt:
3604 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3605 break;
3606 case CCValAssign::AExt:
3607 if (Arg.getValueType().isVector() &&
3608 Arg.getValueType().getVectorElementType() == MVT::i1)
3609 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3610 else if (RegVT.is128BitVector()) {
3611 // Special case: passing MMX values in XMM registers.
3612 Arg = DAG.getBitcast(MVT::i64, Arg);
3613 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3614 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3615 } else
3616 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3617 break;
3618 case CCValAssign::BCvt:
3619 Arg = DAG.getBitcast(RegVT, Arg);
3620 break;
3621 case CCValAssign::Indirect: {
3622 // Store the argument.
3623 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3624 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3625 Chain = DAG.getStore(
3626 Chain, dl, Arg, SpillSlot,
3627 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3628 Arg = SpillSlot;
3629 break;
3630 }
3631 }
3632
3633 if (VA.needsCustom()) {
3634 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3635, __extension__ __PRETTY_FUNCTION__))
3635 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3635, __extension__ __PRETTY_FUNCTION__))
;
3636 // Split v64i1 value into two registers
3637 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3638 Subtarget);
3639 } else if (VA.isRegLoc()) {
3640 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3641 if (isVarArg && IsWin64) {
3642 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3643 // shadow reg if callee is a varargs function.
3644 unsigned ShadowReg = 0;
3645 switch (VA.getLocReg()) {
3646 case X86::XMM0: ShadowReg = X86::RCX; break;
3647 case X86::XMM1: ShadowReg = X86::RDX; break;
3648 case X86::XMM2: ShadowReg = X86::R8; break;
3649 case X86::XMM3: ShadowReg = X86::R9; break;
3650 }
3651 if (ShadowReg)
3652 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3653 }
3654 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3655 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3655, __extension__ __PRETTY_FUNCTION__))
;
3656 if (!StackPtr.getNode())
3657 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3658 getPointerTy(DAG.getDataLayout()));
3659 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3660 dl, DAG, VA, Flags));
3661 }
3662 }
3663
3664 if (!MemOpChains.empty())
3665 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3666
3667 if (Subtarget.isPICStyleGOT()) {
3668 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3669 // GOT pointer.
3670 if (!isTailCall) {
3671 RegsToPass.push_back(std::make_pair(
3672 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3673 getPointerTy(DAG.getDataLayout()))));
3674 } else {
3675 // If we are tail calling and generating PIC/GOT style code load the
3676 // address of the callee into ECX. The value in ecx is used as target of
3677 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3678 // for tail calls on PIC/GOT architectures. Normally we would just put the
3679 // address of GOT into ebx and then call target@PLT. But for tail calls
3680 // ebx would be restored (since ebx is callee saved) before jumping to the
3681 // target@PLT.
3682
3683 // Note: The actual moving to ECX is done further down.
3684 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3685 if (G && !G->getGlobal()->hasLocalLinkage() &&
3686 G->getGlobal()->hasDefaultVisibility())
3687 Callee = LowerGlobalAddress(Callee, DAG);
3688 else if (isa<ExternalSymbolSDNode>(Callee))
3689 Callee = LowerExternalSymbol(Callee, DAG);
3690 }
3691 }
3692
3693 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3694 // From AMD64 ABI document:
3695 // For calls that may call functions that use varargs or stdargs
3696 // (prototype-less calls or calls to functions containing ellipsis (...) in
3697 // the declaration) %al is used as hidden argument to specify the number
3698 // of SSE registers used. The contents of %al do not need to match exactly
3699 // the number of registers, but must be an ubound on the number of SSE
3700 // registers used and is in the range 0 - 8 inclusive.
3701
3702 // Count the number of XMM registers allocated.
3703 static const MCPhysReg XMMArgRegs[] = {
3704 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3705 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3706 };
3707 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3708 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3709, __extension__ __PRETTY_FUNCTION__))
3709 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3709, __extension__ __PRETTY_FUNCTION__))
;
3710
3711 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3712 DAG.getConstant(NumXMMRegs, dl,
3713 MVT::i8)));
3714 }
3715
3716 if (isVarArg && IsMustTail) {
3717 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3718 for (const auto &F : Forwards) {
3719 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3720 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3721 }
3722 }
3723
3724 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3725 // don't need this because the eligibility check rejects calls that require
3726 // shuffling arguments passed in memory.
3727 if (!IsSibcall && isTailCall) {
3728 // Force all the incoming stack arguments to be loaded from the stack
3729 // before any new outgoing arguments are stored to the stack, because the
3730 // outgoing stack slots may alias the incoming argument stack slots, and
3731 // the alias isn't otherwise explicit. This is slightly more conservative
3732 // than necessary, because it means that each store effectively depends
3733 // on every argument instead of just those arguments it would clobber.
3734 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3735
3736 SmallVector<SDValue, 8> MemOpChains2;
3737 SDValue FIN;
3738 int FI = 0;
3739 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3740 ++I, ++OutsIndex) {
3741 CCValAssign &VA = ArgLocs[I];
3742
3743 if (VA.isRegLoc()) {
3744 if (VA.needsCustom()) {
3745 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3746, __extension__ __PRETTY_FUNCTION__))
3746 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3746, __extension__ __PRETTY_FUNCTION__))
;
3747 // This means that we are in special case where one argument was
3748 // passed through two register locations - Skip the next location
3749 ++I;
3750 }
3751
3752 continue;
3753 }
3754
3755 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3755, __extension__ __PRETTY_FUNCTION__))
;
3756 SDValue Arg = OutVals[OutsIndex];
3757 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3758 // Skip inalloca arguments. They don't require any work.
3759 if (Flags.isInAlloca())
3760 continue;
3761 // Create frame index.
3762 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3763 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3764 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3765 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3766
3767 if (Flags.isByVal()) {
3768 // Copy relative to framepointer.
3769 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3770 if (!StackPtr.getNode())
3771 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3772 getPointerTy(DAG.getDataLayout()));
3773 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3774 StackPtr, Source);
3775
3776 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3777 ArgChain,
3778 Flags, DAG, dl));
3779 } else {
3780 // Store relative to framepointer.
3781 MemOpChains2.push_back(DAG.getStore(
3782 ArgChain, dl, Arg, FIN,
3783 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3784 }
3785 }
3786
3787 if (!MemOpChains2.empty())
3788 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3789
3790 // Store the return address to the appropriate stack slot.
3791 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3792 getPointerTy(DAG.getDataLayout()),
3793 RegInfo->getSlotSize(), FPDiff, dl);
3794 }
3795
3796 // Build a sequence of copy-to-reg nodes chained together with token chain
3797 // and flag operands which copy the outgoing args into registers.
3798 SDValue InFlag;
3799 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3800 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3801 RegsToPass[i].second, InFlag);
3802 InFlag = Chain.getValue(1);
3803 }
3804
3805 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3806 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3806, __extension__ __PRETTY_FUNCTION__))
;
3807 // In the 64-bit large code model, we have to make all calls
3808 // through a register, since the call instruction's 32-bit
3809 // pc-relative offset may not be large enough to hold the whole
3810 // address.
3811 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3812 // If the callee is a GlobalAddress node (quite common, every direct call
3813 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3814 // it.
3815 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3816
3817 // We should use extra load for direct calls to dllimported functions in
3818 // non-JIT mode.
3819 const GlobalValue *GV = G->getGlobal();
3820 if (!GV->hasDLLImportStorageClass()) {
3821 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3822
3823 Callee = DAG.getTargetGlobalAddress(
3824 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3825
3826 if (OpFlags == X86II::MO_GOTPCREL) {
3827 // Add a wrapper.
3828 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3829 getPointerTy(DAG.getDataLayout()), Callee);
3830 // Add extra indirection
3831 Callee = DAG.getLoad(
3832 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3833 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3834 }
3835 }
3836 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3837 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
3838 unsigned char OpFlags =
3839 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3840
3841 Callee = DAG.getTargetExternalSymbol(
3842 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3843
3844 if (OpFlags == X86II::MO_GOTPCREL) {
3845 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3846 getPointerTy(DAG.getDataLayout()), Callee);
3847 Callee = DAG.getLoad(
3848 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3849 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3850 }
3851 } else if (Subtarget.isTarget64BitILP32() &&
3852 Callee->getValueType(0) == MVT::i32) {
3853 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3854 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3855 }
3856
3857 // Returns a chain & a flag for retval copy to use.
3858 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3859 SmallVector<SDValue, 8> Ops;
3860
3861 if (!IsSibcall && isTailCall) {
3862 Chain = DAG.getCALLSEQ_END(Chain,
3863 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3864 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3865 InFlag = Chain.getValue(1);
3866 }
3867
3868 Ops.push_back(Chain);
3869 Ops.push_back(Callee);
3870
3871 if (isTailCall)
3872 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3873
3874 // Add argument registers to the end of the list so that they are known live
3875 // into the call.
3876 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3877 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3878 RegsToPass[i].second.getValueType()));
3879
3880 // Add a register mask operand representing the call-preserved registers.
3881 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3882 // set X86_INTR calling convention because it has the same CSR mask
3883 // (same preserved registers).
3884 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3885 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3886 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 3886, __extension__ __PRETTY_FUNCTION__))
;
3887
3888 // If this is an invoke in a 32-bit function using a funclet-based
3889 // personality, assume the function clobbers all registers. If an exception
3890 // is thrown, the runtime will not restore CSRs.
3891 // FIXME: Model this more precisely so that we can register allocate across
3892 // the normal edge and spill and fill across the exceptional edge.
3893 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
3894 const Function &CallerFn = MF.getFunction();
3895 EHPersonality Pers =
3896 CallerFn.hasPersonalityFn()
3897 ? classifyEHPersonality(CallerFn.getPersonalityFn())
3898 : EHPersonality::Unknown;
3899 if (isFuncletEHPersonality(Pers))
3900 Mask = RegInfo->getNoPreservedMask();
3901 }
3902
3903 // Define a new register mask from the existing mask.
3904 uint32_t *RegMask = nullptr;
3905
3906 // In some calling conventions we need to remove the used physical registers
3907 // from the reg mask.
3908 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
3909 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3910
3911 // Allocate a new Reg Mask and copy Mask.
3912 RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
3913 unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
3914 memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
3915
3916 // Make sure all sub registers of the argument registers are reset
3917 // in the RegMask.
3918 for (auto const &RegPair : RegsToPass)
3919 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
3920 SubRegs.isValid(); ++SubRegs)
3921 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3922
3923 // Create the RegMask Operand according to our updated mask.
3924 Ops.push_back(DAG.getRegisterMask(RegMask));
3925 } else {
3926 // Create the RegMask Operand according to the static mask.
3927 Ops.push_back(DAG.getRegisterMask(Mask));
3928 }
3929
3930 if (InFlag.getNode())
3931 Ops.push_back(InFlag);
3932
3933 if (isTailCall) {
3934 // We used to do:
3935 //// If this is the first return lowered for this function, add the regs
3936 //// to the liveout set for the function.
3937 // This isn't right, although it's probably harmless on x86; liveouts
3938 // should be computed from returns not tail calls. Consider a void
3939 // function making a tail call to a function returning int.
3940 MF.getFrameInfo().setHasTailCall();
3941 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3942 }
3943
3944 if (HasNoCfCheck && IsCFProtectionSupported) {
3945 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
3946 } else {
3947 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3948 }
3949 InFlag = Chain.getValue(1);
3950
3951 // Create the CALLSEQ_END node.
3952 unsigned NumBytesForCalleeToPop;
3953 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3954 DAG.getTarget().Options.GuaranteedTailCallOpt))
3955 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3956 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3957 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3958 SR == StackStructReturn)
3959 // If this is a call to a struct-return function, the callee
3960 // pops the hidden struct pointer, so we have to push it back.
3961 // This is common for Darwin/X86, Linux & Mingw32 targets.
3962 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3963 NumBytesForCalleeToPop = 4;
3964 else
3965 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3966
3967 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
3968 // No need to reset the stack after the call if the call doesn't return. To
3969 // make the MI verify, we'll pretend the callee does it for us.
3970 NumBytesForCalleeToPop = NumBytes;
3971 }
3972
3973 // Returns a flag for retval copy to use.
3974 if (!IsSibcall) {
3975 Chain = DAG.getCALLSEQ_END(Chain,
3976 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3977 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3978 true),
3979 InFlag, dl);
3980 InFlag = Chain.getValue(1);
3981 }
3982
3983 // Handle result values, copying them out of physregs into vregs that we
3984 // return.
3985 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
3986 InVals, RegMask);
3987}
3988
3989//===----------------------------------------------------------------------===//
3990// Fast Calling Convention (tail call) implementation
3991//===----------------------------------------------------------------------===//
3992
3993// Like std call, callee cleans arguments, convention except that ECX is
3994// reserved for storing the tail called function address. Only 2 registers are
3995// free for argument passing (inreg). Tail call optimization is performed
3996// provided:
3997// * tailcallopt is enabled
3998// * caller/callee are fastcc
3999// On X86_64 architecture with GOT-style position independent code only local
4000// (within module) calls are supported at the moment.
4001// To keep the stack aligned according to platform abi the function
4002// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4003// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4004// If a tail called function callee has more arguments than the caller the
4005// caller needs to make sure that there is room to move the RETADDR to. This is
4006// achieved by reserving an area the size of the argument delta right after the
4007// original RETADDR, but before the saved framepointer or the spilled registers
4008// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4009// stack layout:
4010// arg1
4011// arg2
4012// RETADDR
4013// [ new RETADDR
4014// move area ]
4015// (possible EBP)
4016// ESI
4017// EDI
4018// local1 ..
4019
4020/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4021/// requirement.
4022unsigned
4023X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
4024 SelectionDAG& DAG) const {
4025 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4026 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
4027 unsigned StackAlignment = TFI.getStackAlignment();
4028 uint64_t AlignMask = StackAlignment - 1;
4029 int64_t Offset = StackSize;
4030 unsigned SlotSize = RegInfo->getSlotSize();
4031 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
4032 // Number smaller than 12 so just add the difference.
4033 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
4034 } else {
4035 // Mask out lower bits, add stackalignment once plus the 12 bytes.
4036 Offset = ((~AlignMask) & Offset) + StackAlignment +
4037 (StackAlignment-SlotSize);
4038 }
4039 return Offset;
4040}
4041
4042/// Return true if the given stack call argument is already available in the
4043/// same position (relatively) of the caller's incoming argument stack.
4044static
4045bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4046 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4047 const X86InstrInfo *TII, const CCValAssign &VA) {
4048 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4049
4050 for (;;) {
4051 // Look through nodes that don't alter the bits of the incoming value.
4052 unsigned Op = Arg.getOpcode();
4053 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4054 Arg = Arg.getOperand(0);
4055 continue;
4056 }
4057 if (Op == ISD::TRUNCATE) {
4058 const SDValue &TruncInput = Arg.getOperand(0);
4059 if (TruncInput.getOpcode() == ISD::AssertZext &&
4060 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4061 Arg.getValueType()) {
4062 Arg = TruncInput.getOperand(0);
4063 continue;
4064 }
4065 }
4066 break;
4067 }
4068
4069 int FI = INT_MAX2147483647;
4070 if (Arg.getOpcode() == ISD::CopyFromReg) {
4071 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4072 if (!TargetRegisterInfo::isVirtualRegister(VR))
4073 return false;
4074 MachineInstr *Def = MRI->getVRegDef(VR);
4075 if (!Def)
4076 return false;
4077 if (!Flags.isByVal()) {
4078 if (!TII->isLoadFromStackSlot(*Def, FI))
4079 return false;
4080 } else {
4081 unsigned Opcode = Def->getOpcode();
4082 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4083 Opcode == X86::LEA64_32r) &&
4084 Def->getOperand(1).isFI()) {
4085 FI = Def->getOperand(1).getIndex();
4086 Bytes = Flags.getByValSize();
4087 } else
4088 return false;
4089 }
4090 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4091 if (Flags.isByVal())
4092 // ByVal argument is passed in as a pointer but it's now being
4093 // dereferenced. e.g.
4094 // define @foo(%struct.X* %A) {
4095 // tail call @bar(%struct.X* byval %A)
4096 // }
4097 return false;
4098 SDValue Ptr = Ld->getBasePtr();
4099 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4100 if (!FINode)
4101 return false;
4102 FI = FINode->getIndex();
4103 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4104 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4105 FI = FINode->getIndex();
4106 Bytes = Flags.getByValSize();
4107 } else
4108 return false;
4109
4110 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 4110, __extension__ __PRETTY_FUNCTION__))
;
4111 if (!MFI.isFixedObjectIndex(FI))
4112 return false;
4113
4114 if (Offset != MFI.getObjectOffset(FI))
4115 return false;
4116
4117 // If this is not byval, check that the argument stack object is immutable.
4118 // inalloca and argument copy elision can create mutable argument stack
4119 // objects. Byval objects can be mutated, but a byval call intends to pass the
4120 // mutated memory.
4121 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4122 return false;
4123
4124 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4125 // If the argument location is wider than the argument type, check that any
4126 // extension flags match.
4127 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4128 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4129 return false;
4130 }
4131 }
4132
4133 return Bytes == MFI.getObjectSize(FI);
4134}
4135
4136/// Check whether the call is eligible for tail call optimization. Targets
4137/// that want to do tail call optimization should implement this function.
4138bool X86TargetLowering::IsEligibleForTailCallOptimization(
4139 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4140 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4141 const SmallVectorImpl<ISD::OutputArg> &Outs,
4142 const SmallVectorImpl<SDValue> &OutVals,
4143 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4144 if (!mayTailCallThisCC(CalleeCC))
4145 return false;
4146
4147 // If -tailcallopt is specified, make fastcc functions tail-callable.
4148 MachineFunction &MF = DAG.getMachineFunction();
4149 const Function &CallerF = MF.getFunction();
4150
4151 // If the function return type is x86_fp80 and the callee return type is not,
4152 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4153 // perform a tailcall optimization here.
4154 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4155 return false;
4156
4157 CallingConv::ID CallerCC = CallerF.getCallingConv();
4158 bool CCMatch = CallerCC == CalleeCC;
4159 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4160 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4161
4162 // Win64 functions have extra shadow space for argument homing. Don't do the
4163 // sibcall if the caller and callee have mismatched expectations for this
4164 // space.
4165 if (IsCalleeWin64 != IsCallerWin64)
4166 return false;
4167
4168 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4169 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4170 return true;
4171 return false;
4172 }
4173
4174 // Look for obvious safe cases to perform tail call optimization that do not
4175 // require ABI changes. This is what gcc calls sibcall.
4176
4177 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4178 // emit a special epilogue.
4179 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4180 if (RegInfo->needsStackRealignment(MF))
4181 return false;
4182
4183 // Also avoid sibcall optimization if either caller or callee uses struct
4184 // return semantics.
4185 if (isCalleeStructRet || isCallerStructRet)
4186 return false;
4187
4188 // Do not sibcall optimize vararg calls unless all arguments are passed via
4189 // registers.
4190 LLVMContext &C = *DAG.getContext();
4191 if (isVarArg && !Outs.empty()) {
4192 // Optimizing for varargs on Win64 is unlikely to be safe without
4193 // additional testing.
4194 if (IsCalleeWin64 || IsCallerWin64)
4195 return false;
4196
4197 SmallVector<CCValAssign, 16> ArgLocs;
4198 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4199
4200 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4201 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4202 if (!ArgLocs[i].isRegLoc())
4203 return false;
4204 }
4205
4206 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4207 // stack. Therefore, if it's not used by the call it is not safe to optimize
4208 // this into a sibcall.
4209 bool Unused = false;
4210 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4211 if (!Ins[i].Used) {
4212 Unused = true;
4213 break;
4214 }
4215 }
4216 if (Unused) {
4217 SmallVector<CCValAssign, 16> RVLocs;
4218 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4219 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4220 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4221 CCValAssign &VA = RVLocs[i];
4222 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4223 return false;
4224 }
4225 }
4226
4227 // Check that the call results are passed in the same way.
4228 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4229 RetCC_X86, RetCC_X86))
4230 return false;
4231 // The callee has to preserve all registers the caller needs to preserve.
4232 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4233 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4234 if (!CCMatch) {
4235 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4236 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4237 return false;
4238 }
4239
4240 unsigned StackArgsSize = 0;
4241
4242 // If the callee takes no arguments then go on to check the results of the
4243 // call.
4244 if (!Outs.empty()) {
4245 // Check if stack adjustment is needed. For now, do not do this if any
4246 // argument is passed on the stack.
4247 SmallVector<CCValAssign, 16> ArgLocs;
4248 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4249
4250 // Allocate shadow area for Win64
4251 if (IsCalleeWin64)
4252 CCInfo.AllocateStack(32, 8);
4253
4254 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4255 StackArgsSize = CCInfo.getNextStackOffset();
4256
4257 if (CCInfo.getNextStackOffset()) {
4258 // Check if the arguments are already laid out in the right way as
4259 // the caller's fixed stack objects.
4260 MachineFrameInfo &MFI = MF.getFrameInfo();
4261 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4262 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4263 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4264 CCValAssign &VA = ArgLocs[i];
4265 SDValue Arg = OutVals[i];
4266 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4267 if (VA.getLocInfo() == CCValAssign::Indirect)
4268 return false;
4269 if (!VA.isRegLoc()) {
4270 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4271 MFI, MRI, TII, VA))
4272 return false;
4273 }
4274 }
4275 }
4276
4277 bool PositionIndependent = isPositionIndependent();
4278 // If the tailcall address may be in a register, then make sure it's
4279 // possible to register allocate for it. In 32-bit, the call address can
4280 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4281 // callee-saved registers are restored. These happen to be the same
4282 // registers used to pass 'inreg' arguments so watch out for those.
4283 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4284 !isa<ExternalSymbolSDNode>(Callee)) ||
4285 PositionIndependent)) {
4286 unsigned NumInRegs = 0;
4287 // In PIC we need an extra register to formulate the address computation
4288 // for the callee.
4289 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4290
4291 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4292 CCValAssign &VA = ArgLocs[i];
4293 if (!VA.isRegLoc())
4294 continue;
4295 unsigned Reg = VA.getLocReg();
4296 switch (Reg) {
4297 default: break;
4298 case X86::EAX: case X86::EDX: case X86::ECX:
4299 if (++NumInRegs == MaxInRegs)
4300 return false;
4301 break;
4302 }
4303 }
4304 }
4305
4306 const MachineRegisterInfo &MRI = MF.getRegInfo();
4307 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4308 return false;
4309 }
4310
4311 bool CalleeWillPop =
4312 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4313 MF.getTarget().Options.GuaranteedTailCallOpt);
4314
4315 if (unsigned BytesToPop =
4316 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4317 // If we have bytes to pop, the callee must pop them.
4318 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4319 if (!CalleePopMatches)
4320 return false;
4321 } else if (CalleeWillPop && StackArgsSize > 0) {
4322 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4323 return false;
4324 }
4325
4326 return true;
4327}
4328
4329FastISel *
4330X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4331 const TargetLibraryInfo *libInfo) const {
4332 return X86::createFastISel(funcInfo, libInfo);
4333}
4334
4335//===----------------------------------------------------------------------===//
4336// Other Lowering Hooks
4337//===----------------------------------------------------------------------===//
4338
4339static bool MayFoldLoad(SDValue Op) {
4340 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4341}
4342
4343static bool MayFoldIntoStore(SDValue Op) {
4344 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4345}
4346
4347static bool MayFoldIntoZeroExtend(SDValue Op) {
4348 if (Op.hasOneUse()) {
4349 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4350 return (ISD::ZERO_EXTEND == Opcode);
4351 }
4352 return false;
4353}
4354
4355static bool isTargetShuffle(unsigned Opcode) {
4356 switch(Opcode) {
4357 default: return false;
4358 case X86ISD::BLENDI:
4359 case X86ISD::PSHUFB:
4360 case X86ISD::PSHUFD:
4361 case X86ISD::PSHUFHW:
4362 case X86ISD::PSHUFLW:
4363 case X86ISD::SHUFP:
4364 case X86ISD::INSERTPS:
4365 case X86ISD::EXTRQI:
4366 case X86ISD::INSERTQI:
4367 case X86ISD::PALIGNR:
4368 case X86ISD::VSHLDQ:
4369 case X86ISD::VSRLDQ:
4370 case X86ISD::MOVLHPS:
4371 case X86ISD::MOVHLPS:
4372 case X86ISD::MOVSHDUP:
4373 case X86ISD::MOVSLDUP:
4374 case X86ISD::MOVDDUP:
4375 case X86ISD::MOVSS:
4376 case X86ISD::MOVSD:
4377 case X86ISD::UNPCKL:
4378 case X86ISD::UNPCKH:
4379 case X86ISD::VBROADCAST:
4380 case X86ISD::VPERMILPI:
4381 case X86ISD::VPERMILPV:
4382 case X86ISD::VPERM2X128:
4383 case X86ISD::SHUF128:
4384 case X86ISD::VPERMIL2:
4385 case X86ISD::VPERMI:
4386 case X86ISD::VPPERM:
4387 case X86ISD::VPERMV:
4388 case X86ISD::VPERMV3:
4389 case X86ISD::VZEXT_MOVL:
4390 return true;
4391 }
4392}
4393
4394static bool isTargetShuffleVariableMask(unsigned Opcode) {
4395 switch (Opcode) {
4396 default: return false;
4397 // Target Shuffles.
4398 case X86ISD::PSHUFB:
4399 case X86ISD::VPERMILPV:
4400 case X86ISD::VPERMIL2:
4401 case X86ISD::VPPERM:
4402 case X86ISD::VPERMV:
4403 case X86ISD::VPERMV3:
4404 return true;
4405 // 'Faux' Target Shuffles.
4406 case ISD::AND:
4407 case X86ISD::ANDNP:
4408 return true;
4409 }
4410}
4411
4412SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4413 MachineFunction &MF = DAG.getMachineFunction();
4414 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4415 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4416 int ReturnAddrIndex = FuncInfo->getRAIndex();
4417
4418 if (ReturnAddrIndex == 0) {
4419 // Set up a frame object for the return address.
4420 unsigned SlotSize = RegInfo->getSlotSize();
4421 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4422 -(int64_t)SlotSize,
4423 false);
4424 FuncInfo->setRAIndex(ReturnAddrIndex);
4425 }
4426
4427 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4428}
4429
4430bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4431 bool hasSymbolicDisplacement) {
4432 // Offset should fit into 32 bit immediate field.
4433 if (!isInt<32>(Offset))
4434 return false;
4435
4436 // If we don't have a symbolic displacement - we don't have any extra
4437 // restrictions.
4438 if (!hasSymbolicDisplacement)
4439 return true;
4440
4441 // FIXME: Some tweaks might be needed for medium code model.
4442 if (M != CodeModel::Small && M != CodeModel::Kernel)
4443 return false;
4444
4445 // For small code model we assume that latest object is 16MB before end of 31
4446 // bits boundary. We may also accept pretty large negative constants knowing
4447 // that all objects are in the positive half of address space.
4448 if (M == CodeModel::Small && Offset < 16*1024*1024)
4449 return true;
4450
4451 // For kernel code model we know that all object resist in the negative half
4452 // of 32bits address space. We may not accept negative offsets, since they may
4453 // be just off and we may accept pretty large positive ones.
4454 if (M == CodeModel::Kernel && Offset >= 0)
4455 return true;
4456
4457 return false;
4458}
4459
4460/// Determines whether the callee is required to pop its own arguments.
4461/// Callee pop is necessary to support tail calls.
4462bool X86::isCalleePop(CallingConv::ID CallingConv,
4463 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4464 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4465 // can guarantee TCO.
4466 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4467 return true;
4468
4469 switch (CallingConv) {
4470 default:
4471 return false;
4472 case CallingConv::X86_StdCall:
4473 case CallingConv::X86_FastCall:
4474 case CallingConv::X86_ThisCall:
4475 case CallingConv::X86_VectorCall:
4476 return !is64Bit;
4477 }
4478}
4479
4480/// Return true if the condition is an unsigned comparison operation.
4481static bool isX86CCUnsigned(unsigned X86CC) {
4482 switch (X86CC) {
4483 default:
4484 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 4484)
;
4485 case X86::COND_E:
4486 case X86::COND_NE:
4487 case X86::COND_B:
4488 case X86::COND_A:
4489 case X86::COND_BE:
4490 case X86::COND_AE:
4491 return true;
4492 case X86::COND_G:
4493 case X86::COND_GE:
4494 case X86::COND_L:
4495 case X86::COND_LE:
4496 return false;
4497 }
4498}
4499
4500static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4501 switch (SetCCOpcode) {
4502 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 4502)
;
4503 case ISD::SETEQ: return X86::COND_E;
4504 case ISD::SETGT: return X86::COND_G;
4505 case ISD::SETGE: return X86::COND_GE;
4506 case ISD::SETLT: return X86::COND_L;
4507 case ISD::SETLE: return X86::COND_LE;
4508 case ISD::SETNE: return X86::COND_NE;
4509 case ISD::SETULT: return X86::COND_B;
4510 case ISD::SETUGT: return X86::COND_A;
4511 case ISD::SETULE: return X86::COND_BE;
4512 case ISD::SETUGE: return X86::COND_AE;
4513 }
4514}
4515
4516/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4517/// condition code, returning the condition code and the LHS/RHS of the
4518/// comparison to make.
4519static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4520 bool isFP, SDValue &LHS, SDValue &RHS,
4521 SelectionDAG &DAG) {
4522 if (!isFP) {
4523 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4524 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4525 // X > -1 -> X == 0, jump !sign.
4526 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4527 return X86::COND_NS;
4528 }
4529 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4530 // X < 0 -> X == 0, jump on sign.
4531 return X86::COND_S;
4532 }
4533 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4534 // X < 1 -> X <= 0
4535 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4536 return X86::COND_LE;
4537 }
4538 }
4539
4540 return TranslateIntegerX86CC(SetCCOpcode);
4541 }
4542
4543 // First determine if it is required or is profitable to flip the operands.
4544
4545 // If LHS is a foldable load, but RHS is not, flip the condition.
4546 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4547 !ISD::isNON_EXTLoad(RHS.getNode())) {
4548 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4549 std::swap(LHS, RHS);
4550 }
4551
4552 switch (SetCCOpcode) {
4553 default: break;
4554 case ISD::SETOLT:
4555 case ISD::SETOLE:
4556 case ISD::SETUGT:
4557 case ISD::SETUGE:
4558 std::swap(LHS, RHS);
4559 break;
4560 }
4561
4562 // On a floating point condition, the flags are set as follows:
4563 // ZF PF CF op
4564 // 0 | 0 | 0 | X > Y
4565 // 0 | 0 | 1 | X < Y
4566 // 1 | 0 | 0 | X == Y
4567 // 1 | 1 | 1 | unordered
4568 switch (SetCCOpcode) {
4569 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 4569)
;
4570 case ISD::SETUEQ:
4571 case ISD::SETEQ: return X86::COND_E;
4572 case ISD::SETOLT: // flipped
4573 case ISD::SETOGT:
4574 case ISD::SETGT: return X86::COND_A;
4575 case ISD::SETOLE: // flipped
4576 case ISD::SETOGE:
4577 case ISD::SETGE: return X86::COND_AE;
4578 case ISD::SETUGT: // flipped
4579 case ISD::SETULT:
4580 case ISD::SETLT: return X86::COND_B;
4581 case ISD::SETUGE: // flipped
4582 case ISD::SETULE:
4583 case ISD::SETLE: return X86::COND_BE;
4584 case ISD::SETONE:
4585 case ISD::SETNE: return X86::COND_NE;
4586 case ISD::SETUO: return X86::COND_P;
4587 case ISD::SETO: return X86::COND_NP;
4588 case ISD::SETOEQ:
4589 case ISD::SETUNE: return X86::COND_INVALID;
4590 }
4591}
4592
4593/// Is there a floating point cmov for the specific X86 condition code?
4594/// Current x86 isa includes the following FP cmov instructions:
4595/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4596static bool hasFPCMov(unsigned X86CC) {
4597 switch (X86CC) {
4598 default:
4599 return false;
4600 case X86::COND_B:
4601 case X86::COND_BE:
4602 case X86::COND_E:
4603 case X86::COND_P:
4604 case X86::COND_A:
4605 case X86::COND_AE:
4606 case X86::COND_NE:
4607 case X86::COND_NP:
4608 return true;
4609 }
4610}
4611
4612
4613bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4614 const CallInst &I,
4615 MachineFunction &MF,
4616 unsigned Intrinsic) const {
4617
4618 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4619 if (!IntrData)
4620 return false;
4621
4622 Info.opc = ISD::INTRINSIC_W_CHAIN;
4623 Info.flags = MachineMemOperand::MONone;
4624 Info.offset = 0;
4625
4626 switch (IntrData->Type) {
4627 case TRUNCATE_TO_MEM_VI8:
4628 case TRUNCATE_TO_MEM_VI16:
4629 case TRUNCATE_TO_MEM_VI32: {
4630 Info.ptrVal = I.getArgOperand(0);
4631 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4632 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4633 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4634 ScalarVT = MVT::i8;
4635 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4636 ScalarVT = MVT::i16;
4637 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4638 ScalarVT = MVT::i32;
4639
4640 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4641 Info.align = 1;
4642 Info.flags |= MachineMemOperand::MOStore;
4643 break;
4644 }
4645 default:
4646 return false;
4647 }
4648
4649 return true;
4650}
4651
4652/// Returns true if the target can instruction select the
4653/// specified FP immediate natively. If false, the legalizer will
4654/// materialize the FP immediate as a load from a constant pool.
4655bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4656 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4657 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4658 return true;
4659 }
4660 return false;
4661}
4662
4663bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4664 ISD::LoadExtType ExtTy,
4665 EVT NewVT) const {
4666 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4667 // relocation target a movq or addq instruction: don't let the load shrink.
4668 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4669 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4670 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4671 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4672 return true;
4673}
4674
4675/// Returns true if it is beneficial to convert a load of a constant
4676/// to just the constant itself.
4677bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4678 Type *Ty) const {
4679 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 4679, __extension__ __PRETTY_FUNCTION__))
;
4680
4681 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4682 if (BitSize == 0 || BitSize > 64)
4683 return false;
4684 return true;
4685}
4686
4687bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4688 // TODO: It might be a win to ease or lift this restriction, but the generic
4689 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4690 if (VT.isVector() && Subtarget.hasAVX512())
4691 return false;
4692
4693 return true;
4694}
4695
4696bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4697 unsigned Index) const {
4698 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4699 return false;
4700
4701 // Mask vectors support all subregister combinations and operations that
4702 // extract half of vector.
4703 if (ResVT.getVectorElementType() == MVT::i1)
4704 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4705 (Index == ResVT.getVectorNumElements()));
4706
4707 return (Index % ResVT.getVectorNumElements()) == 0;
4708}
4709
4710bool X86TargetLowering::isCheapToSpeculateCttz() const {
4711 // Speculate cttz only if we can directly use TZCNT.
4712 return Subtarget.hasBMI();
4713}
4714
4715bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4716 // Speculate ctlz only if we can directly use LZCNT.
4717 return Subtarget.hasLZCNT();
4718}
4719
4720bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
4721 EVT BitcastVT) const {
4722 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
4723 return false;
4724
4725 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
4726}
4727
4728bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4729 const SelectionDAG &DAG) const {
4730 // Do not merge to float value size (128 bytes) if no implicit
4731 // float attribute is set.
4732 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
4733 Attribute::NoImplicitFloat);
4734
4735 if (NoFloat) {
4736 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4737 return (MemVT.getSizeInBits() <= MaxIntSize);
4738 }
4739 return true;
4740}
4741
4742bool X86TargetLowering::isCtlzFast() const {
4743 return Subtarget.hasFastLZCNT();
4744}
4745
4746bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4747 const Instruction &AndI) const {
4748 return true;
4749}
4750
4751bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4752 EVT VT = Y.getValueType();
4753
4754 if (VT.isVector())
4755 return false;
4756
4757 if (!Subtarget.hasBMI())
4758 return false;
4759
4760 // There are only 32-bit and 64-bit forms for 'andn'.
4761 if (VT != MVT::i32 && VT != MVT::i64)
4762 return false;
4763
4764 // A mask and compare against constant is ok for an 'andn' too
4765 // even though the BMI instruction doesn't have an immediate form.
4766
4767 return true;
4768}
4769
4770bool X86TargetLowering::hasAndNot(SDValue Y) const {
4771 EVT VT = Y.getValueType();
4772
4773 if (!VT.isVector()) // x86 can't form 'andn' with an immediate.
4774 return !isa<ConstantSDNode>(Y) && hasAndNotCompare(Y);
4775
4776 // Vector.
4777
4778 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
4779 return false;
4780
4781 if (VT == MVT::v4i32)
4782 return true;
4783
4784 return Subtarget.hasSSE2();
4785}
4786
4787bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
4788 EVT VT = Y.getValueType();
4789
4790 // For vectors, we don't have a preference, but we probably want a mask.
4791 if (VT.isVector())
4792 return false;
4793
4794 // 64-bit shifts on 32-bit targets produce really bad bloated code.
4795 if (VT == MVT::i64 && !Subtarget.is64Bit())
4796 return false;
4797
4798 return true;
4799}
4800
4801MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
4802 MVT VT = MVT::getIntegerVT(NumBits);
4803 if (isTypeLegal(VT))
4804 return VT;
4805
4806 // PMOVMSKB can handle this.
4807 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
4808 return MVT::v16i8;
4809
4810 // VPMOVMSKB can handle this.
4811 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
4812 return MVT::v32i8;
4813
4814 // TODO: Allow 64-bit type for 32-bit target.
4815 // TODO: 512-bit types should be allowed, but make sure that those
4816 // cases are handled in combineVectorSizedSetCCEquality().
4817
4818 return MVT::INVALID_SIMPLE_VALUE_TYPE;
4819}
4820
4821/// Val is the undef sentinel value or equal to the specified value.
4822static bool isUndefOrEqual(int Val, int CmpVal) {
4823 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
4824}
4825
4826/// Val is either the undef or zero sentinel value.
4827static bool isUndefOrZero(int Val) {
4828 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
4829}
4830
4831/// Return true if every element in Mask, beginning
4832/// from position Pos and ending in Pos+Size is the undef sentinel value.
4833static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
4834 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4835 if (Mask[i] != SM_SentinelUndef)
4836 return false;
4837 return true;
4838}
4839
4840/// Return true if Val falls within the specified range (L, H].
4841static bool isInRange(int Val, int Low, int Hi) {
4842 return (Val >= Low && Val < Hi);
4843}
4844
4845/// Return true if the value of any element in Mask falls within the specified
4846/// range (L, H].
4847static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
4848 for (int M : Mask)
4849 if (isInRange(M, Low, Hi))
4850 return true;
4851 return false;
4852}
4853
4854/// Return true if Val is undef or if its value falls within the
4855/// specified range (L, H].
4856static bool isUndefOrInRange(int Val, int Low, int Hi) {
4857 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
4858}
4859
4860/// Return true if every element in Mask is undef or if its value
4861/// falls within the specified range (L, H].
4862static bool isUndefOrInRange(ArrayRef<int> Mask,
4863 int Low, int Hi) {
4864 for (int M : Mask)
4865 if (!isUndefOrInRange(M, Low, Hi))
4866 return false;
4867 return true;
4868}
4869
4870/// Return true if Val is undef, zero or if its value falls within the
4871/// specified range (L, H].
4872static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
4873 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
4874}
4875
4876/// Return true if every element in Mask is undef, zero or if its value
4877/// falls within the specified range (L, H].
4878static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
4879 for (int M : Mask)
4880 if (!isUndefOrZeroOrInRange(M, Low, Hi))
4881 return false;
4882 return true;
4883}
4884
4885/// Return true if every element in Mask, beginning
4886/// from position Pos and ending in Pos + Size, falls within the specified
4887/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
4888static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
4889 unsigned Size, int Low, int Step = 1) {
4890 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
4891 if (!isUndefOrEqual(Mask[i], Low))
4892 return false;
4893 return true;
4894}
4895
4896/// Return true if every element in Mask, beginning
4897/// from position Pos and ending in Pos+Size, falls within the specified
4898/// sequential range (Low, Low+Size], or is undef or is zero.
4899static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4900 unsigned Size, int Low) {
4901 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
4902 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
4903 return false;
4904 return true;
4905}
4906
4907/// Return true if every element in Mask, beginning
4908/// from position Pos and ending in Pos+Size is undef or is zero.
4909static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4910 unsigned Size) {
4911 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4912 if (!isUndefOrZero(Mask[i]))
4913 return false;
4914 return true;
4915}
4916
4917/// Helper function to test whether a shuffle mask could be
4918/// simplified by widening the elements being shuffled.
4919///
4920/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
4921/// leaves it in an unspecified state.
4922///
4923/// NOTE: This must handle normal vector shuffle masks and *target* vector
4924/// shuffle masks. The latter have the special property of a '-2' representing
4925/// a zero-ed lane of a vector.
4926static bool canWidenShuffleElements(ArrayRef<int> Mask,
4927 SmallVectorImpl<int> &WidenedMask) {
4928 WidenedMask.assign(Mask.size() / 2, 0);
4929 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
4930 int M0 = Mask[i];
4931 int M1 = Mask[i + 1];
4932
4933 // If both elements are undef, its trivial.
4934 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
4935 WidenedMask[i / 2] = SM_SentinelUndef;
4936 continue;
4937 }
4938
4939 // Check for an undef mask and a mask value properly aligned to fit with
4940 // a pair of values. If we find such a case, use the non-undef mask's value.
4941 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
4942 WidenedMask[i / 2] = M1 / 2;
4943 continue;
4944 }
4945 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
4946 WidenedMask[i / 2] = M0 / 2;
4947 continue;
4948 }
4949
4950 // When zeroing, we need to spread the zeroing across both lanes to widen.
4951 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
4952 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
4953 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
4954 WidenedMask[i / 2] = SM_SentinelZero;
4955 continue;
4956 }
4957 return false;
4958 }
4959
4960 // Finally check if the two mask values are adjacent and aligned with
4961 // a pair.
4962 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
4963 WidenedMask[i / 2] = M0 / 2;
4964 continue;
4965 }
4966
4967 // Otherwise we can't safely widen the elements used in this shuffle.
4968 return false;
4969 }
4970 assert(WidenedMask.size() == Mask.size() / 2 &&(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 4971, __extension__ __PRETTY_FUNCTION__))
4971 "Incorrect size of mask after widening the elements!")(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 4971, __extension__ __PRETTY_FUNCTION__))
;
4972
4973 return true;
4974}
4975
4976static bool canWidenShuffleElements(ArrayRef<int> Mask,
4977 const APInt &Zeroable,
4978 SmallVectorImpl<int> &WidenedMask) {
4979 SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
4980 for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
4981 if (TargetMask[i] == SM_SentinelUndef)
4982 continue;
4983 if (Zeroable[i])
4984 TargetMask[i] = SM_SentinelZero;
4985 }
4986 return canWidenShuffleElements(TargetMask, WidenedMask);
4987}
4988
4989static bool canWidenShuffleElements(ArrayRef<int> Mask) {
4990 SmallVector<int, 32> WidenedMask;
4991 return canWidenShuffleElements(Mask, WidenedMask);
4992}
4993
4994/// Returns true if Elt is a constant zero or a floating point constant +0.0.
4995bool X86::isZeroNode(SDValue Elt) {
4996 return isNullConstant(Elt) || isNullFPConstant(Elt);
4997}
4998
4999// Build a vector of constants.
5000// Use an UNDEF node if MaskElt == -1.
5001// Split 64-bit constants in the 32-bit mode.
5002static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5003 const SDLoc &dl, bool IsMask = false) {
5004
5005 SmallVector<SDValue, 32> Ops;
5006 bool Split = false;
5007
5008 MVT ConstVecVT = VT;
5009 unsigned NumElts = VT.getVectorNumElements();
5010 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5011 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5012 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5013 Split = true;
5014 }
5015
5016 MVT EltVT = ConstVecVT.getVectorElementType();
5017 for (unsigned i = 0; i < NumElts; ++i) {
5018 bool IsUndef = Values[i] < 0 && IsMask;
5019 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5020 DAG.getConstant(Values[i], dl, EltVT);
5021 Ops.push_back(OpNode);
5022 if (Split)
5023 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5024 DAG.getConstant(0, dl, EltVT));
5025 }
5026 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5027 if (Split)
5028 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5029 return ConstsNode;
5030}
5031
5032static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5033 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5034 assert(Bits.size() == Undefs.getBitWidth() &&(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5035, __extension__ __PRETTY_FUNCTION__))
5035 "Unequal constant and undef arrays")(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5035, __extension__ __PRETTY_FUNCTION__))
;
5036 SmallVector<SDValue, 32> Ops;
5037 bool Split = false;
5038
5039 MVT ConstVecVT = VT;
5040 unsigned NumElts = VT.getVectorNumElements();
5041 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5042 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5043 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5044 Split = true;
5045 }
5046
5047 MVT EltVT = ConstVecVT.getVectorElementType();
5048 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5049 if (Undefs[i]) {
5050 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5051 continue;
5052 }
5053 const APInt &V = Bits[i];
5054 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")(static_cast <bool> (V.getBitWidth() == VT.getScalarSizeInBits
() && "Unexpected sizes") ? void (0) : __assert_fail (
"V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5054, __extension__ __PRETTY_FUNCTION__))
;
5055 if (Split) {
5056 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5057 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5058 } else if (EltVT == MVT::f32) {
5059 APFloat FV(APFloat::IEEEsingle(), V);
5060 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5061 } else if (EltVT == MVT::f64) {
5062 APFloat FV(APFloat::IEEEdouble(), V);
5063 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5064 } else {
5065 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5066 }
5067 }
5068
5069 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5070 return DAG.getBitcast(VT, ConstsNode);
5071}
5072
5073/// Returns a vector of specified type with all zero elements.
5074static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5075 SelectionDAG &DAG, const SDLoc &dl) {
5076 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5078, __extension__ __PRETTY_FUNCTION__))
5077 VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5078, __extension__ __PRETTY_FUNCTION__))
5078 "Unexpected vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5078, __extension__ __PRETTY_FUNCTION__))
;
5079
5080 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5081 // type. This ensures they get CSE'd. But if the integer type is not
5082 // available, use a floating-point +0.0 instead.
5083 SDValue Vec;
5084 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5085 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5086 } else if (VT.getVectorElementType() == MVT::i1) {
5087 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5088, __extension__ __PRETTY_FUNCTION__))
5088 "Unexpected vector type")(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5088, __extension__ __PRETTY_FUNCTION__))
;
5089 Vec = DAG.getConstant(0, dl, VT);
5090 } else {
5091 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5092 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5093 }
5094 return DAG.getBitcast(VT, Vec);
5095}
5096
5097static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5098 const SDLoc &dl, unsigned vectorWidth) {
5099 EVT VT = Vec.getValueType();
5100 EVT ElVT = VT.getVectorElementType();
5101 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5102 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5103 VT.getVectorNumElements()/Factor);
5104
5105 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5106 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5107 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5107, __extension__ __PRETTY_FUNCTION__))
;
5108
5109 // This is the index of the first element of the vectorWidth-bit chunk
5110 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5111 IdxVal &= ~(ElemsPerChunk - 1);
5112
5113 // If the input is a buildvector just emit a smaller one.
5114 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5115 return DAG.getBuildVector(ResultVT, dl,
5116 Vec->ops().slice(IdxVal, ElemsPerChunk));
5117
5118 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5119 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5120}
5121
5122/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5123/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5124/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5125/// instructions or a simple subregister reference. Idx is an index in the
5126/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5127/// lowering EXTRACT_VECTOR_ELT operations easier.
5128static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5129 SelectionDAG &DAG, const SDLoc &dl) {
5130 assert((Vec.getValueType().is256BitVector() ||(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5131, __extension__ __PRETTY_FUNCTION__))
5131 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5131, __extension__ __PRETTY_FUNCTION__))
;
5132 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5133}
5134
5135/// Generate a DAG to grab 256-bits from a 512-bit vector.
5136static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5137 SelectionDAG &DAG, const SDLoc &dl) {
5138 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is512BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5138, __extension__ __PRETTY_FUNCTION__))
;
5139 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5140}
5141
5142static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5143 SelectionDAG &DAG, const SDLoc &dl,
5144 unsigned vectorWidth) {
5145 assert((vectorWidth == 128 || vectorWidth == 256) &&(static_cast <bool> ((vectorWidth == 128 || vectorWidth
== 256) && "Unsupported vector width") ? void (0) : __assert_fail
("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5146, __extension__ __PRETTY_FUNCTION__))
5146 "Unsupported vector width")(static_cast <bool> ((vectorWidth == 128 || vectorWidth
== 256) && "Unsupported vector width") ? void (0) : __assert_fail
("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5146, __extension__ __PRETTY_FUNCTION__))
;
5147 // Inserting UNDEF is Result
5148 if (Vec.isUndef())
5149 return Result;
5150 EVT VT = Vec.getValueType();
5151 EVT ElVT = VT.getVectorElementType();
5152 EVT ResultVT = Result.getValueType();
5153
5154 // Insert the relevant vectorWidth bits.
5155 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5156 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5156, __extension__ __PRETTY_FUNCTION__))
;
5157
5158 // This is the index of the first element of the vectorWidth-bit chunk
5159 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5160 IdxVal &= ~(ElemsPerChunk - 1);
5161
5162 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5163 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5164}
5165
5166/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5167/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5168/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5169/// simple superregister reference. Idx is an index in the 128 bits
5170/// we want. It need not be aligned to a 128-bit boundary. That makes
5171/// lowering INSERT_VECTOR_ELT operations easier.
5172static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5173 SelectionDAG &DAG, const SDLoc &dl) {
5174 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is128BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5174, __extension__ __PRETTY_FUNCTION__))
;
5175 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5176}
5177
5178/// Widen a vector to a larger size with the same scalar type, with the new
5179/// elements either zero or undef.
5180static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5181 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5182 const SDLoc &dl) {
5183 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&(static_cast <bool> (Vec.getValueSizeInBits() < VT.getSizeInBits
() && Vec.getValueType().getScalarType() == VT.getScalarType
() && "Unsupported vector widening type") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5185, __extension__ __PRETTY_FUNCTION__))
5184 Vec.getValueType().getScalarType() == VT.getScalarType() &&(static_cast <bool> (Vec.getValueSizeInBits() < VT.getSizeInBits
() && Vec.getValueType().getScalarType() == VT.getScalarType
() && "Unsupported vector widening type") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5185, __extension__ __PRETTY_FUNCTION__))
5185 "Unsupported vector widening type")(static_cast <bool> (Vec.getValueSizeInBits() < VT.getSizeInBits
() && Vec.getValueType().getScalarType() == VT.getScalarType
() && "Unsupported vector widening type") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5185, __extension__ __PRETTY_FUNCTION__))
;
5186 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5187 : DAG.getUNDEF(VT);
5188 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5189 DAG.getIntPtrConstant(0, dl));
5190}
5191
5192// Helper for splitting operands of an operation to legal target size and
5193// apply a function on each part.
5194// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5195// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5196// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5197// The argument Builder is a function that will be applied on each split part:
5198// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5199template <typename F>
5200SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5201 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5202 F Builder, bool CheckBWI = true) {
5203 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")(static_cast <bool> (Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? void (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5203, __extension__ __PRETTY_FUNCTION__))
;
5204 unsigned NumSubs = 1;
5205 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5206 (!CheckBWI && Subtarget.useAVX512Regs())) {
5207 if (VT.getSizeInBits() > 512) {
5208 NumSubs = VT.getSizeInBits() / 512;
5209 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(static_cast <bool> ((VT.getSizeInBits() % 512) == 0 &&
"Illegal vector size") ? void (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5209, __extension__ __PRETTY_FUNCTION__))
;
5210 }
5211 } else if (Subtarget.hasAVX2()) {
5212 if (VT.getSizeInBits() > 256) {
5213 NumSubs = VT.getSizeInBits() / 256;
5214 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(static_cast <bool> ((VT.getSizeInBits() % 256) == 0 &&
"Illegal vector size") ? void (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5214, __extension__ __PRETTY_FUNCTION__))
;
5215 }
5216 } else {
5217 if (VT.getSizeInBits() > 128) {
5218 NumSubs = VT.getSizeInBits() / 128;
5219 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(static_cast <bool> ((VT.getSizeInBits() % 128) == 0 &&
"Illegal vector size") ? void (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5219, __extension__ __PRETTY_FUNCTION__))
;
5220 }
5221 }
5222
5223 if (NumSubs == 1)
5224 return Builder(DAG, DL, Ops);
5225
5226 SmallVector<SDValue, 4> Subs;
5227 for (unsigned i = 0; i != NumSubs; ++i) {
5228 SmallVector<SDValue, 2> SubOps;
5229 for (SDValue Op : Ops) {
5230 EVT OpVT = Op.getValueType();
5231 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5232 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5233 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5234 }
5235 Subs.push_back(Builder(DAG, DL, SubOps));
5236 }
5237 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5238}
5239
5240// Return true if the instruction zeroes the unused upper part of the
5241// destination and accepts mask.
5242static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5243 switch (Opcode) {
5244 default:
5245 return false;
5246 case X86ISD::CMPM:
5247 case X86ISD::CMPM_RND:
5248 case ISD::SETCC:
5249 return true;
5250 }
5251}
5252
5253/// Insert i1-subvector to i1-vector.
5254static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5255 const X86Subtarget &Subtarget) {
5256
5257 SDLoc dl(Op);
5258 SDValue Vec = Op.getOperand(0);
5259 SDValue SubVec = Op.getOperand(1);
5260 SDValue Idx = Op.getOperand(2);
5261
5262 if (!isa<ConstantSDNode>(Idx))
5263 return SDValue();
5264
5265 // Inserting undef is a nop. We can just return the original vector.
5266 if (SubVec.isUndef())
5267 return Vec;
5268
5269 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5270 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5271 return Op;
5272
5273 MVT OpVT = Op.getSimpleValueType();
5274 unsigned NumElems = OpVT.getVectorNumElements();
5275
5276 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5277
5278 // Extend to natively supported kshift.
5279 MVT WideOpVT = OpVT;
5280 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5281 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5282
5283 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5284 // if necessary.
5285 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5286 // May need to promote to a legal type.
5287 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5288 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5289 SubVec, Idx);
5290 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5291 }
5292
5293 MVT SubVecVT = SubVec.getSimpleValueType();
5294 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5295
5296 assert(IdxVal + SubVecNumElems <= NumElems &&(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5298, __extension__ __PRETTY_FUNCTION__))
5297 IdxVal % SubVecVT.getSizeInBits() == 0 &&(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5298, __extension__ __PRETTY_FUNCTION__))
5298 "Unexpected index value in INSERT_SUBVECTOR")(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5298, __extension__ __PRETTY_FUNCTION__))
;
5299
5300 SDValue Undef = DAG.getUNDEF(WideOpVT);
5301
5302 if (IdxVal == 0) {
5303 // Zero lower bits of the Vec
5304 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5305 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5306 ZeroIdx);
5307 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5308 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5309 // Merge them together, SubVec should be zero extended.
5310 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5311 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5312 SubVec, ZeroIdx);
5313 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5314 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5315 }
5316
5317 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5318 Undef, SubVec, ZeroIdx);
5319
5320 if (Vec.isUndef()) {
5321 assert(IdxVal != 0 && "Unexpected index")(static_cast <bool> (IdxVal != 0 && "Unexpected index"
) ? void (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5321, __extension__ __PRETTY_FUNCTION__))
;
5322 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5323 DAG.getConstant(IdxVal, dl, MVT::i8));
5324 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5325 }
5326
5327 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5328 assert(IdxVal != 0 && "Unexpected index")(static_cast <bool> (IdxVal != 0 && "Unexpected index"
) ? void (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5328, __extension__ __PRETTY_FUNCTION__))
;
5329 NumElems = WideOpVT.getVectorNumElements();
5330 unsigned ShiftLeft = NumElems - SubVecNumElems;
5331 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5332 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5333 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5334 if (ShiftRight != 0)
5335 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5336 DAG.getConstant(ShiftRight, dl, MVT::i8));
5337 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5338 }
5339
5340 // Simple case when we put subvector in the upper part
5341 if (IdxVal + SubVecNumElems == NumElems) {
5342 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5343 DAG.getConstant(IdxVal, dl, MVT::i8));
5344 if (SubVecNumElems * 2 == NumElems) {
5345 // Special case, use legal zero extending insert_subvector. This allows
5346 // isel to opimitize when bits are known zero.
5347 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5348 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5349 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5350 Vec, ZeroIdx);
5351 } else {
5352 // Otherwise use explicit shifts to zero the bits.
5353 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5354 Undef, Vec, ZeroIdx);
5355 NumElems = WideOpVT.getVectorNumElements();
5356 SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
5357 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5358 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5359 }
5360 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5361 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5362 }
5363
5364 // Inserting into the middle is more complicated.
5365
5366 NumElems = WideOpVT.getVectorNumElements();
5367
5368 // Widen the vector if needed.
5369 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5370 // Move the current value of the bit to be replace to the lsbs.
5371 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5372 DAG.getConstant(IdxVal, dl, MVT::i8));
5373 // Xor with the new bit.
5374 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
5375 // Shift to MSB, filling bottom bits with 0.
5376 unsigned ShiftLeft = NumElems - SubVecNumElems;
5377 Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
5378 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5379 // Shift to the final position, filling upper bits with 0.
5380 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5381 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
5382 DAG.getConstant(ShiftRight, dl, MVT::i8));
5383 // Xor with original vector leaving the new value.
5384 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
5385 // Reduce to original width if needed.
5386 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5387}
5388
5389static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
5390 unsigned NumElems, SelectionDAG &DAG,
5391 const SDLoc &dl, unsigned VectorWidth) {
5392 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth);
5393 return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth);
5394}
5395
5396/// Returns a vector of specified type with all bits set.
5397/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5398/// Then bitcast to their original type, ensuring they get CSE'd.
5399static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5400 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5401, __extension__ __PRETTY_FUNCTION__))
5401 "Expected a 128/256/512-bit vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5401, __extension__ __PRETTY_FUNCTION__))
;
5402
5403 APInt Ones = APInt::getAllOnesValue(32);
5404 unsigned NumElts = VT.getSizeInBits() / 32;
5405 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5406 return DAG.getBitcast(VT, Vec);
5407}
5408
5409static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
5410 SelectionDAG &DAG) {
5411 EVT InVT = In.getValueType();
5412 assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode")(static_cast <bool> ((X86ISD::VSEXT == Opc || X86ISD::VZEXT
== Opc) && "Unexpected opcode") ? void (0) : __assert_fail
("(X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5412, __extension__ __PRETTY_FUNCTION__))
;
5413
5414 if (VT.is128BitVector() && InVT.is128BitVector())
5415 return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
5416 : DAG.getZeroExtendVectorInReg(In, DL, VT);
5417
5418 // For 256-bit vectors, we only need the lower (128-bit) input half.
5419 // For 512-bit vectors, we only need the lower input half or quarter.
5420 if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
5421 int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5422 In = extractSubVector(In, 0, DAG, DL,
5423 std::max(128, (int)VT.getSizeInBits() / Scale));
5424 }
5425
5426 return DAG.getNode(Opc, DL, VT, In);
5427}
5428
5429/// Returns a vector_shuffle node for an unpackl operation.
5430static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5431 SDValue V1, SDValue V2) {
5432 SmallVector<int, 8> Mask;
5433 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5434 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5435}
5436
5437/// Returns a vector_shuffle node for an unpackh operation.
5438static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5439 SDValue V1, SDValue V2) {
5440 SmallVector<int, 8> Mask;
5441 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5442 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5443}
5444
5445/// Return a vector_shuffle of the specified vector of zero or undef vector.
5446/// This produces a shuffle where the low element of V2 is swizzled into the
5447/// zero/undef vector, landing at element Idx.
5448/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5449static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5450 bool IsZero,
5451 const X86Subtarget &Subtarget,
5452 SelectionDAG &DAG) {
5453 MVT VT = V2.getSimpleValueType();
5454 SDValue V1 = IsZero
5455 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5456 int NumElems = VT.getVectorNumElements();
5457 SmallVector<int, 16> MaskVec(NumElems);
5458 for (int i = 0; i != NumElems; ++i)
5459 // If this is the insertion idx, put the low elt of V2 here.
5460 MaskVec[i] = (i == Idx) ? NumElems : i;
5461 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5462}
5463
5464static SDValue peekThroughBitcasts(SDValue V) {
5465 while (V.getNode() && V.getOpcode() == ISD::BITCAST)
5466 V = V.getOperand(0);
5467 return V;
5468}
5469
5470static SDValue peekThroughOneUseBitcasts(SDValue V) {
5471 while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
5472 V.getOperand(0).hasOneUse())
5473 V = V.getOperand(0);
5474 return V;
5475}
5476
5477// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
5478static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
5479 while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
5480 V = V.getOperand(0);
5481 return V;
5482}
5483
5484static const Constant *getTargetConstantFromNode(SDValue Op) {
5485 Op = peekThroughBitcasts(Op);
5486
5487 auto *Load = dyn_cast<LoadSDNode>(Op);
5488 if (!Load)
5489 return nullptr;
5490
5491 SDValue Ptr = Load->getBasePtr();
5492 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5493 Ptr->getOpcode() == X86ISD::WrapperRIP)
5494 Ptr = Ptr->getOperand(0);
5495
5496 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5497 if (!CNode || CNode->isMachineConstantPoolEntry())
5498 return nullptr;
5499
5500 return dyn_cast<Constant>(CNode->getConstVal());
5501}
5502
5503// Extract raw constant bits from constant pools.
5504static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5505 APInt &UndefElts,
5506 SmallVectorImpl<APInt> &EltBits,
5507 bool AllowWholeUndefs = true,
5508 bool AllowPartialUndefs = true) {
5509 assert(EltBits.empty() && "Expected an empty EltBits vector")(static_cast <bool> (EltBits.empty() && "Expected an empty EltBits vector"
) ? void (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5509, __extension__ __PRETTY_FUNCTION__))
;
5510
5511 Op = peekThroughBitcasts(Op);
5512
5513 EVT VT = Op.getValueType();
5514 unsigned SizeInBits = VT.getSizeInBits();
5515 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(static_cast <bool> ((SizeInBits % EltSizeInBits) == 0 &&
"Can't split constant!") ? void (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5515, __extension__ __PRETTY_FUNCTION__))
;
5516 unsigned NumElts = SizeInBits / EltSizeInBits;
5517
5518 // Bitcast a source array of element bits to the target size.
5519 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5520 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5521 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5522 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(static_cast <bool> ((NumSrcElts * SrcEltSizeInBits) ==
SizeInBits && "Constant bit sizes don't match") ? void
(0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5523, __extension__ __PRETTY_FUNCTION__))
5523 "Constant bit sizes don't match")(static_cast <bool> ((NumSrcElts * SrcEltSizeInBits) ==
SizeInBits && "Constant bit sizes don't match") ? void
(0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5523, __extension__ __PRETTY_FUNCTION__))
;
5524
5525 // Don't split if we don't allow undef bits.
5526 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5527 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5528 return false;
5529
5530 // If we're already the right size, don't bother bitcasting.
5531 if (NumSrcElts == NumElts) {
5532 UndefElts = UndefSrcElts;
5533 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5534 return true;
5535 }
5536
5537 // Extract all the undef/constant element data and pack into single bitsets.
5538 APInt UndefBits(SizeInBits, 0);
5539 APInt MaskBits(SizeInBits, 0);
5540
5541 for (unsigned i = 0; i != NumSrcElts; ++i) {
5542 unsigned BitOffset = i * SrcEltSizeInBits;
5543 if (UndefSrcElts[i])
5544 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5545 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5546 }
5547
5548 // Split the undef/constant single bitset data into the target elements.
5549 UndefElts = APInt(NumElts, 0);
5550 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5551
5552 for (unsigned i = 0; i != NumElts; ++i) {
5553 unsigned BitOffset = i * EltSizeInBits;
5554 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5555
5556 // Only treat an element as UNDEF if all bits are UNDEF.
5557 if (UndefEltBits.isAllOnesValue()) {
5558 if (!AllowWholeUndefs)
5559 return false;
5560 UndefElts.setBit(i);
5561 continue;
5562 }
5563
5564 // If only some bits are UNDEF then treat them as zero (or bail if not
5565 // supported).
5566 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5567 return false;
5568
5569 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5570 EltBits[i] = Bits.getZExtValue();
5571 }
5572 return true;
5573 };
5574
5575 // Collect constant bits and insert into mask/undef bit masks.
5576 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5577 unsigned UndefBitIndex) {
5578 if (!Cst)
5579 return false;
5580 if (isa<UndefValue>(Cst)) {
5581 Undefs.setBit(UndefBitIndex);
5582 return true;
5583 }
5584 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5585 Mask = CInt->getValue();
5586 return true;
5587 }
5588 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5589 Mask = CFP->getValueAPF().bitcastToAPInt();
5590 return true;
5591 }
5592 return false;
5593 };
5594
5595 // Handle UNDEFs.
5596 if (Op.isUndef()) {
5597 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5598 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5599 return CastBitData(UndefSrcElts, SrcEltBits);
5600 }
5601
5602 // Extract scalar constant bits.
5603 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5604 APInt UndefSrcElts = APInt::getNullValue(1);
5605 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5606 return CastBitData(UndefSrcElts, SrcEltBits);
5607 }
5608 if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
5609 APInt UndefSrcElts = APInt::getNullValue(1);
5610 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5611 SmallVector<APInt, 64> SrcEltBits(1, RawBits);
5612 return CastBitData(UndefSrcElts, SrcEltBits);
5613 }
5614
5615 // Extract constant bits from build vector.
5616 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5617 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5618 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5619
5620 APInt UndefSrcElts(NumSrcElts, 0);
5621 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5622 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5623 const SDValue &Src = Op.getOperand(i);
5624 if (Src.isUndef()) {
5625 UndefSrcElts.setBit(i);
5626 continue;
5627 }
5628 auto *Cst = cast<ConstantSDNode>(Src);
5629 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5630 }
5631 return CastBitData(UndefSrcElts, SrcEltBits);
5632 }
5633
5634 // Extract constant bits from constant pool vector.
5635 if (auto *Cst = getTargetConstantFromNode(Op)) {
5636 Type *CstTy = Cst->getType();
5637 if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
5638 return false;
5639
5640 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
5641 unsigned NumSrcElts = CstTy->getVectorNumElements();
5642
5643 APInt UndefSrcElts(NumSrcElts, 0);
5644 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5645 for (unsigned i = 0; i != NumSrcElts; ++i)
5646 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
5647 UndefSrcElts, i))
5648 return false;
5649
5650 return CastBitData(UndefSrcElts, SrcEltBits);
5651 }
5652
5653 // Extract constant bits from a broadcasted constant pool scalar.
5654 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5655 EltSizeInBits <= VT.getScalarSizeInBits()) {
5656 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5657 unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
5658 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5659
5660 APInt UndefSrcElts(NumSrcElts, 0);
5661 SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
5662 if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
5663 if (UndefSrcElts[0])
5664 UndefSrcElts.setBits(0, NumSrcElts);
5665 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
5666 return CastBitData(UndefSrcElts, SrcEltBits);
5667 }
5668 }
5669 }
5670
5671 // Extract a rematerialized scalar constant insertion.
5672 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5673 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5674 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5675 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5676 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5677
5678 APInt UndefSrcElts(NumSrcElts, 0);
5679 SmallVector<APInt, 64> SrcEltBits;
5680 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5681 SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
5682 SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
5683 return CastBitData(UndefSrcElts, SrcEltBits);
5684 }
5685
5686 return false;
5687}
5688
5689static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5690 unsigned MaskEltSizeInBits,
5691 SmallVectorImpl<uint64_t> &RawMask) {
5692 APInt UndefElts;
5693 SmallVector<APInt, 64> EltBits;
5694
5695 // Extract the raw target constant bits.
5696 // FIXME: We currently don't support UNDEF bits or mask entries.
5697 if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
5698 EltBits, /* AllowWholeUndefs */ false,
5699 /* AllowPartialUndefs */ false))
5700 return false;
5701
5702 // Insert the extracted elements into the mask.
5703 for (APInt Elt : EltBits)
5704 RawMask.push_back(Elt.getZExtValue());
5705
5706 return true;
5707}
5708
5709/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
5710/// Note: This ignores saturation, so inputs must be checked first.
5711static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
5712 bool Unary) {
5713 assert(Mask.empty() && "Expected an empty shuffle mask vector")(static_cast <bool> (Mask.empty() && "Expected an empty shuffle mask vector"
) ? void (0) : __assert_fail ("Mask.empty() && \"Expected an empty shuffle mask vector\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5713, __extension__ __PRETTY_FUNCTION__))
;
5714 unsigned NumElts = VT.getVectorNumElements();
5715 unsigned NumLanes = VT.getSizeInBits() / 128;
5716 unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();
5717 unsigned Offset = Unary ? 0 : NumElts;
5718
5719 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
5720 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5721 Mask.push_back(Elt + (Lane * NumEltsPerLane));
5722 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5723 Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
5724 }
5725}
5726
5727/// Calculates the shuffle mask corresponding to the target-specific opcode.
5728/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
5729/// operands in \p Ops, and returns true.
5730/// Sets \p IsUnary to true if only one source is used. Note that this will set
5731/// IsUnary for shuffles which use a single input multiple times, and in those
5732/// cases it will adjust the mask to only have indices within that single input.
5733/// It is an error to call this with non-empty Mask/Ops vectors.
5734static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
5735 SmallVectorImpl<SDValue> &Ops,
5736 SmallVectorImpl<int> &Mask, bool &IsUnary) {
5737 unsigned NumElems = VT.getVectorNumElements();
5738 SDValue ImmN;
5739
5740 assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector")(static_cast <bool> (Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"
) ? void (0) : __assert_fail ("Mask.empty() && \"getTargetShuffleMask expects an empty Mask vector\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5740, __extension__ __PRETTY_FUNCTION__))
;
5741 assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector")(static_cast <bool> (Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"
) ? void (0) : __assert_fail ("Ops.empty() && \"getTargetShuffleMask expects an empty Ops vector\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5741, __extension__ __PRETTY_FUNCTION__))
;
5742
5743 IsUnary = false;
5744 bool IsFakeUnary = false;
5745 switch(N->getOpcode()) {
5746 case X86ISD::BLENDI:
5747 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5747, __extension__ __PRETTY_FUNCTION__))
;
5748 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5748, __extension__ __PRETTY_FUNCTION__))
;
5749 ImmN = N->getOperand(N->getNumOperands()-1);
5750 DecodeBLENDMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5751 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5752 break;
5753 case X86ISD::SHUFP:
5754 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5754, __extension__ __PRETTY_FUNCTION__))
;
5755 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5755, __extension__ __PRETTY_FUNCTION__))
;
5756 ImmN = N->getOperand(N->getNumOperands()-1);
5757 DecodeSHUFPMask(NumElems, VT.getScalarSizeInBits(),
5758 cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5759 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5760 break;
5761 case X86ISD::INSERTPS:
5762 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5762, __extension__ __PRETTY_FUNCTION__))
;
5763 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5763, __extension__ __PRETTY_FUNCTION__))
;
5764 ImmN = N->getOperand(N->getNumOperands()-1);
5765 DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5766 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5767 break;
5768 case X86ISD::EXTRQI:
5769 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5769, __extension__ __PRETTY_FUNCTION__))
;
5770 if (isa<ConstantSDNode>(N->getOperand(1)) &&
5771 isa<ConstantSDNode>(N->getOperand(2))) {
5772 int BitLen = N->getConstantOperandVal(1);
5773 int BitIdx = N->getConstantOperandVal(2);
5774 DecodeEXTRQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx,
5775 Mask);
5776 IsUnary = true;
5777 }
5778 break;
5779 case X86ISD::INSERTQI:
5780 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5780, __extension__ __PRETTY_FUNCTION__))
;
5781 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5781, __extension__ __PRETTY_FUNCTION__))
;
5782 if (isa<ConstantSDNode>(N->getOperand(2)) &&
5783 isa<ConstantSDNode>(N->getOperand(3))) {
5784 int BitLen = N->getConstantOperandVal(2);
5785 int BitIdx = N->getConstantOperandVal(3);
5786 DecodeINSERTQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx,
5787 Mask);
5788 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5789 }
5790 break;
5791 case X86ISD::UNPCKH:
5792 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5792, __extension__ __PRETTY_FUNCTION__))
;
5793 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5793, __extension__ __PRETTY_FUNCTION__))
;
5794 DecodeUNPCKHMask(NumElems, VT.getScalarSizeInBits(), Mask);
5795 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5796 break;
5797 case X86ISD::UNPCKL:
5798 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5798, __extension__ __PRETTY_FUNCTION__))
;
5799 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5799, __extension__ __PRETTY_FUNCTION__))
;
5800 DecodeUNPCKLMask(NumElems, VT.getScalarSizeInBits(), Mask);
5801 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5802 break;
5803 case X86ISD::MOVHLPS:
5804 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5804, __extension__ __PRETTY_FUNCTION__))
;
5805 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5805, __extension__ __PRETTY_FUNCTION__))
;
5806 DecodeMOVHLPSMask(NumElems, Mask);
5807 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5808 break;
5809 case X86ISD::MOVLHPS:
5810 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5810, __extension__ __PRETTY_FUNCTION__))
;
5811 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5811, __extension__ __PRETTY_FUNCTION__))
;
5812 DecodeMOVLHPSMask(NumElems, Mask);
5813 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5814 break;
5815 case X86ISD::PALIGNR:
5816 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5816, __extension__ __PRETTY_FUNCTION__))
;
5817 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5817, __extension__ __PRETTY_FUNCTION__))
;
5818 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5818, __extension__ __PRETTY_FUNCTION__))
;
5819 ImmN = N->getOperand(N->getNumOperands()-1);
5820 DecodePALIGNRMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
5821 Mask);
5822 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5823 Ops.push_back(N->getOperand(1));
5824 Ops.push_back(N->getOperand(0));
5825 break;
5826 case X86ISD::VSHLDQ:
5827 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5827, __extension__ __PRETTY_FUNCTION__))
;
5828 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5828, __extension__ __PRETTY_FUNCTION__))
;
5829 ImmN = N->getOperand(N->getNumOperands() - 1);
5830 DecodePSLLDQMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
5831 Mask);
5832 IsUnary = true;
5833 break;
5834 case X86ISD::VSRLDQ:
5835 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5835, __extension__ __PRETTY_FUNCTION__))
;
5836 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5836, __extension__ __PRETTY_FUNCTION__))
;
5837 ImmN = N->getOperand(N->getNumOperands() - 1);
5838 DecodePSRLDQMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
5839 Mask);
5840 IsUnary = true;
5841 break;
5842 case X86ISD::PSHUFD:
5843 case X86ISD::VPERMILPI:
5844 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-7~svn337490/lib/Target/X86/X86ISelLowering.cpp"
, 5844, __extension__ __PRETTY_FUNCTION__))
;
5845 ImmN = N->getOperand(N->getNumOperands()-1);
5846 DecodePSHUFMask(NumElems, VT.getScalarSizeInBits(),