Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1163, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/include -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-12-11-181444-25759-1 -x c++ /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "Utils/X86ShuffleDecode.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86InstrBuilder.h"
19#include "X86IntrinsicsInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86TargetMachine.h"
22#include "X86TargetObjectFile.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringExtras.h"
27#include "llvm/ADT/StringSwitch.h"
28#include "llvm/Analysis/EHPersonalities.h"
29#include "llvm/CodeGen/IntrinsicLowering.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineJumpTableInfo.h"
34#include "llvm/CodeGen/MachineModuleInfo.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/WinEHFuncInfo.h"
38#include "llvm/IR/CallSite.h"
39#include "llvm/IR/CallingConv.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DerivedTypes.h"
42#include "llvm/IR/DiagnosticInfo.h"
43#include "llvm/IR/Function.h"
44#include "llvm/IR/GlobalAlias.h"
45#include "llvm/IR/GlobalVariable.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/MC/MCAsmInfo.h"
49#include "llvm/MC/MCContext.h"
50#include "llvm/MC/MCExpr.h"
51#include "llvm/MC/MCSymbol.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/KnownBits.h"
56#include "llvm/Support/MathExtras.h"
57#include "llvm/Target/TargetOptions.h"
58#include <algorithm>
59#include <bitset>
60#include <cctype>
61#include <numeric>
62using namespace llvm;
63
64#define DEBUG_TYPE"x86-isel" "x86-isel"
65
66STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
67
68static cl::opt<int> ExperimentalPrefLoopAlignment(
69 "x86-experimental-pref-loop-alignment", cl::init(4),
70 cl::desc(
71 "Sets the preferable loop alignment for experiments (as log2 bytes)"
72 "(the last x86-experimental-pref-loop-alignment bits"
73 " of the loop header PC will be 0)."),
74 cl::Hidden);
75
76// Added in 10.0.
77static cl::opt<bool> EnableOldKNLABI(
78 "x86-enable-old-knl-abi", cl::init(false),
79 cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of "
80 "one ZMM register on AVX512F, but not AVX512BW targets."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
108 const X86Subtarget &STI)
109 : TargetLowering(TM), Subtarget(STI) {
110 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
111 X86ScalarSSEf64 = Subtarget.hasSSE2();
112 X86ScalarSSEf32 = Subtarget.hasSSE1();
113 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
114
115 // Set up the TargetLowering object.
116
117 // X86 is weird. It always uses i8 for shift amounts and setcc results.
118 setBooleanContents(ZeroOrOneBooleanContent);
119 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
120 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
121
122 // For 64-bit, since we have so many registers, use the ILP scheduler.
123 // For 32-bit, use the register pressure specific scheduling.
124 // For Atom, always use ILP scheduling.
125 if (Subtarget.isAtom())
126 setSchedulingPreference(Sched::ILP);
127 else if (Subtarget.is64Bit())
128 setSchedulingPreference(Sched::ILP);
129 else
130 setSchedulingPreference(Sched::RegPressure);
131 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
132 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
133
134 // Bypass expensive divides and use cheaper ones.
135 if (TM.getOptLevel() >= CodeGenOpt::Default) {
136 if (Subtarget.hasSlowDivide32())
137 addBypassSlowDiv(32, 8);
138 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
139 addBypassSlowDiv(64, 32);
140 }
141
142 if (Subtarget.isTargetWindowsMSVC() ||
143 Subtarget.isTargetWindowsItanium()) {
144 // Setup Windows compiler runtime calls.
145 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
146 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
147 setLibcallName(RTLIB::SREM_I64, "_allrem");
148 setLibcallName(RTLIB::UREM_I64, "_aullrem");
149 setLibcallName(RTLIB::MUL_I64, "_allmul");
150 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
151 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
152 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
153 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
154 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
155 }
156
157 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
158 // MSVCRT doesn't have powi; fall back to pow
159 setLibcallName(RTLIB::POWI_F32, nullptr);
160 setLibcallName(RTLIB::POWI_F64, nullptr);
161 }
162
163 if (Subtarget.isTargetDarwin()) {
164 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
165 setUseUnderscoreSetJmp(false);
166 setUseUnderscoreLongJmp(false);
167 } else if (Subtarget.isTargetWindowsGNU()) {
168 // MS runtime is weird: it exports _setjmp, but longjmp!
169 setUseUnderscoreSetJmp(true);
170 setUseUnderscoreLongJmp(false);
171 } else {
172 setUseUnderscoreSetJmp(true);
173 setUseUnderscoreLongJmp(true);
174 }
175
176 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
177 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
178 // FIXME: Should we be limitting the atomic size on other configs? Default is
179 // 1024.
180 if (!Subtarget.hasCmpxchg8b())
181 setMaxAtomicSizeInBitsSupported(32);
182
183 // Set up the register classes.
184 addRegisterClass(MVT::i8, &X86::GR8RegClass);
185 addRegisterClass(MVT::i16, &X86::GR16RegClass);
186 addRegisterClass(MVT::i32, &X86::GR32RegClass);
187 if (Subtarget.is64Bit())
188 addRegisterClass(MVT::i64, &X86::GR64RegClass);
189
190 for (MVT VT : MVT::integer_valuetypes())
191 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
192
193 // We don't accept any truncstore of integer registers.
194 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
195 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
196 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
197 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
198 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
199 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
200
201 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
202
203 // SETOEQ and SETUNE require checking two conditions.
204 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
205 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
206 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
207 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
208 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
209 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
210
211 // Integer absolute.
212 if (Subtarget.hasCMov()) {
213 setOperationAction(ISD::ABS , MVT::i16 , Custom);
214 setOperationAction(ISD::ABS , MVT::i32 , Custom);
215 }
216 setOperationAction(ISD::ABS , MVT::i64 , Custom);
217
218 // Funnel shifts.
219 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
220 setOperationAction(ShiftOp , MVT::i16 , Custom);
221 setOperationAction(ShiftOp , MVT::i32 , Custom);
222 if (Subtarget.is64Bit())
223 setOperationAction(ShiftOp , MVT::i64 , Custom);
224 }
225
226 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
227 // operation.
228 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
229 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
230 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
231
232 if (!Subtarget.useSoftFloat()) {
233 // We have an algorithm for SSE2->double, and we turn this into a
234 // 64-bit FILD followed by conditional FADD for other targets.
235 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
236 // We have an algorithm for SSE2, and we turn this into a 64-bit
237 // FILD or VCVTUSI2SS/SD for other targets.
238 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
239 } else {
240 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
241 }
242
243 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
244 // this operation.
245 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
246 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
247
248 if (!Subtarget.useSoftFloat()) {
249 // SSE has no i16 to fp conversion, only i32.
250 if (X86ScalarSSEf32) {
251 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
252 // f32 and f64 cases are Legal, f80 case is not
253 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
254 } else {
255 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
256 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
257 }
258 } else {
259 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
260 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
261 }
262
263 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
264 // this operation.
265 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
266 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
267
268 if (!Subtarget.useSoftFloat()) {
269 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
270 // are Legal, f80 is custom lowered.
271 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
272 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
273
274 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
275 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
276 } else {
277 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
278 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
279 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
280 }
281
282 // Handle FP_TO_UINT by promoting the destination to a larger signed
283 // conversion.
284 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
285 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
286 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
287
288 if (!Subtarget.useSoftFloat()) {
289 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
290 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
291 }
292
293 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
294 if (!X86ScalarSSEf64) {
295 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
296 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
297 if (Subtarget.is64Bit()) {
298 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
299 // Without SSE, i64->f64 goes through memory.
300 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
301 }
302 } else if (!Subtarget.is64Bit())
303 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
304
305 // Scalar integer divide and remainder are lowered to use operations that
306 // produce two results, to match the available instructions. This exposes
307 // the two-result form to trivial CSE, which is able to combine x/y and x%y
308 // into a single instruction.
309 //
310 // Scalar integer multiply-high is also lowered to use two-result
311 // operations, to match the available instructions. However, plain multiply
312 // (low) operations are left as Legal, as there are single-result
313 // instructions for this in x86. Using the two-result multiply instructions
314 // when both high and low results are needed must be arranged by dagcombine.
315 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
316 setOperationAction(ISD::MULHS, VT, Expand);
317 setOperationAction(ISD::MULHU, VT, Expand);
318 setOperationAction(ISD::SDIV, VT, Expand);
319 setOperationAction(ISD::UDIV, VT, Expand);
320 setOperationAction(ISD::SREM, VT, Expand);
321 setOperationAction(ISD::UREM, VT, Expand);
322 }
323
324 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
325 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
326 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
327 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
328 setOperationAction(ISD::BR_CC, VT, Expand);
329 setOperationAction(ISD::SELECT_CC, VT, Expand);
330 }
331 if (Subtarget.is64Bit())
332 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
333 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
334 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
335 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
336
337 setOperationAction(ISD::FREM , MVT::f32 , Expand);
338 setOperationAction(ISD::FREM , MVT::f64 , Expand);
339 setOperationAction(ISD::FREM , MVT::f80 , Expand);
340 setOperationAction(ISD::FREM , MVT::f128 , Expand);
341 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
342
343 // Promote the i8 variants and force them on up to i32 which has a shorter
344 // encoding.
345 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
346 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
347 if (!Subtarget.hasBMI()) {
348 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
349 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
350 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
351 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
352 if (Subtarget.is64Bit()) {
353 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
354 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
355 }
356 }
357
358 if (Subtarget.hasLZCNT()) {
359 // When promoting the i8 variants, force them to i32 for a shorter
360 // encoding.
361 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
362 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
363 } else {
364 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
365 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
366 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
367 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
368 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
369 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
370 if (Subtarget.is64Bit()) {
371 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
372 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
373 }
374 }
375
376 // Special handling for half-precision floating point conversions.
377 // If we don't have F16C support, then lower half float conversions
378 // into library calls.
379 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
380 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
381 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
382 }
383
384 // There's never any support for operations beyond MVT::f32.
385 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
386 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
387 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
388 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
389 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
390 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
391
392 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
393 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
394 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
395 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
396 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
397 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
398 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
399 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
400
401 if (Subtarget.hasPOPCNT()) {
402 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
403 } else {
404 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
405 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
406 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
407 if (Subtarget.is64Bit())
408 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
409 else
410 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
411 }
412
413 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
414
415 if (!Subtarget.hasMOVBE())
416 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
417
418 // These should be promoted to a larger select which is supported.
419 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
420 // X86 wants to expand cmov itself.
421 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
422 setOperationAction(ISD::SELECT, VT, Custom);
423 setOperationAction(ISD::SETCC, VT, Custom);
424 }
425 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
426 if (VT == MVT::i64 && !Subtarget.is64Bit())
427 continue;
428 setOperationAction(ISD::SELECT, VT, Custom);
429 setOperationAction(ISD::SETCC, VT, Custom);
430 }
431
432 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
433 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
434 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
435
436 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
437 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
438 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
439 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
440 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
441 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
442 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
443 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
444
445 // Darwin ABI issue.
446 for (auto VT : { MVT::i32, MVT::i64 }) {
447 if (VT == MVT::i64 && !Subtarget.is64Bit())
448 continue;
449 setOperationAction(ISD::ConstantPool , VT, Custom);
450 setOperationAction(ISD::JumpTable , VT, Custom);
451 setOperationAction(ISD::GlobalAddress , VT, Custom);
452 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
453 setOperationAction(ISD::ExternalSymbol , VT, Custom);
454 setOperationAction(ISD::BlockAddress , VT, Custom);
455 }
456
457 // 64-bit shl, sra, srl (iff 32-bit x86)
458 for (auto VT : { MVT::i32, MVT::i64 }) {
459 if (VT == MVT::i64 && !Subtarget.is64Bit())
460 continue;
461 setOperationAction(ISD::SHL_PARTS, VT, Custom);
462 setOperationAction(ISD::SRA_PARTS, VT, Custom);
463 setOperationAction(ISD::SRL_PARTS, VT, Custom);
464 }
465
466 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
467 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
468
469 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
470
471 // Expand certain atomics
472 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
473 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
477 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
478 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
479 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
480 }
481
482 if (!Subtarget.is64Bit())
483 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
484
485 if (Subtarget.hasCmpxchg16b()) {
486 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
487 }
488
489 // FIXME - use subtarget debug flags
490 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
491 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
492 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
493 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
494 }
495
496 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
497 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
498
499 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
500 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
501
502 setOperationAction(ISD::TRAP, MVT::Other, Legal);
503 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
504
505 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
506 setOperationAction(ISD::VASTART , MVT::Other, Custom);
507 setOperationAction(ISD::VAEND , MVT::Other, Expand);
508 bool Is64Bit = Subtarget.is64Bit();
509 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
510 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
511
512 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
513 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
514
515 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
516
517 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
518 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
519 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
520
521 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
522 // f32 and f64 use SSE.
523 // Set up the FP register classes.
524 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
525 : &X86::FR32RegClass);
526 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
527 : &X86::FR64RegClass);
528
529 // Disable f32->f64 extload as we can only generate this in one instruction
530 // under optsize. So its easier to pattern match (fpext (load)) for that
531 // case instead of needing to emit 2 instructions for extload in the
532 // non-optsize case.
533 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
534
535 for (auto VT : { MVT::f32, MVT::f64 }) {
536 // Use ANDPD to simulate FABS.
537 setOperationAction(ISD::FABS, VT, Custom);
538
539 // Use XORP to simulate FNEG.
540 setOperationAction(ISD::FNEG, VT, Custom);
541
542 // Use ANDPD and ORPD to simulate FCOPYSIGN.
543 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
544
545 // These might be better off as horizontal vector ops.
546 setOperationAction(ISD::FADD, VT, Custom);
547 setOperationAction(ISD::FSUB, VT, Custom);
548
549 // We don't support sin/cos/fmod
550 setOperationAction(ISD::FSIN , VT, Expand);
551 setOperationAction(ISD::FCOS , VT, Expand);
552 setOperationAction(ISD::FSINCOS, VT, Expand);
553 }
554
555 // Lower this to MOVMSK plus an AND.
556 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
557 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
558
559 } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
560 // Use SSE for f32, x87 for f64.
561 // Set up the FP register classes.
562 addRegisterClass(MVT::f32, &X86::FR32RegClass);
563 if (UseX87)
564 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
565
566 // Use ANDPS to simulate FABS.
567 setOperationAction(ISD::FABS , MVT::f32, Custom);
568
569 // Use XORP to simulate FNEG.
570 setOperationAction(ISD::FNEG , MVT::f32, Custom);
571
572 if (UseX87)
573 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
574
575 // Use ANDPS and ORPS to simulate FCOPYSIGN.
576 if (UseX87)
577 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
578 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
579
580 // We don't support sin/cos/fmod
581 setOperationAction(ISD::FSIN , MVT::f32, Expand);
582 setOperationAction(ISD::FCOS , MVT::f32, Expand);
583 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
584
585 if (UseX87) {
586 // Always expand sin/cos functions even though x87 has an instruction.
587 setOperationAction(ISD::FSIN, MVT::f64, Expand);
588 setOperationAction(ISD::FCOS, MVT::f64, Expand);
589 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
590 }
591 } else if (UseX87) {
592 // f32 and f64 in x87.
593 // Set up the FP register classes.
594 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
595 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
596
597 for (auto VT : { MVT::f32, MVT::f64 }) {
598 setOperationAction(ISD::UNDEF, VT, Expand);
599 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
600
601 // Always expand sin/cos functions even though x87 has an instruction.
602 setOperationAction(ISD::FSIN , VT, Expand);
603 setOperationAction(ISD::FCOS , VT, Expand);
604 setOperationAction(ISD::FSINCOS, VT, Expand);
605 }
606 }
607
608 // Expand FP32 immediates into loads from the stack, save special cases.
609 if (isTypeLegal(MVT::f32)) {
610 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
611 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
612 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
613 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
614 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
615 } else // SSE immediates.
616 addLegalFPImmediate(APFloat(+0.0f)); // xorps
617 }
618 // Expand FP64 immediates into loads from the stack, save special cases.
619 if (isTypeLegal(MVT::f64)) {
620 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
621 addLegalFPImmediate(APFloat(+0.0)); // FLD0
622 addLegalFPImmediate(APFloat(+1.0)); // FLD1
623 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
624 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
625 } else // SSE immediates.
626 addLegalFPImmediate(APFloat(+0.0)); // xorpd
627 }
628
629 // We don't support FMA.
630 setOperationAction(ISD::FMA, MVT::f64, Expand);
631 setOperationAction(ISD::FMA, MVT::f32, Expand);
632
633 // f80 always uses X87.
634 if (UseX87) {
635 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
636 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
637 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
638 {
639 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
640 addLegalFPImmediate(TmpFlt); // FLD0
641 TmpFlt.changeSign();
642 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
643
644 bool ignored;
645 APFloat TmpFlt2(+1.0);
646 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
647 &ignored);
648 addLegalFPImmediate(TmpFlt2); // FLD1
649 TmpFlt2.changeSign();
650 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
651 }
652
653 // Always expand sin/cos functions even though x87 has an instruction.
654 setOperationAction(ISD::FSIN , MVT::f80, Expand);
655 setOperationAction(ISD::FCOS , MVT::f80, Expand);
656 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
657
658 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
659 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
660 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
661 setOperationAction(ISD::FRINT, MVT::f80, Expand);
662 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
663 setOperationAction(ISD::FMA, MVT::f80, Expand);
664 setOperationAction(ISD::LROUND, MVT::f80, Expand);
665 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
666 setOperationAction(ISD::LRINT, MVT::f80, Expand);
667 setOperationAction(ISD::LLRINT, MVT::f80, Expand);
668 }
669
670 // f128 uses xmm registers, but most operations require libcalls.
671 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
672 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
673 : &X86::VR128RegClass);
674
675 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
676
677 setOperationAction(ISD::FADD, MVT::f128, Custom);
678 setOperationAction(ISD::FSUB, MVT::f128, Custom);
679 setOperationAction(ISD::FDIV, MVT::f128, Custom);
680 setOperationAction(ISD::FMUL, MVT::f128, Custom);
681 setOperationAction(ISD::FMA, MVT::f128, Expand);
682
683 setOperationAction(ISD::FABS, MVT::f128, Custom);
684 setOperationAction(ISD::FNEG, MVT::f128, Custom);
685 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
686
687 setOperationAction(ISD::FSIN, MVT::f128, Expand);
688 setOperationAction(ISD::FCOS, MVT::f128, Expand);
689 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
690 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
691
692 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
693 // We need to custom handle any FP_ROUND with an f128 input, but
694 // LegalizeDAG uses the result type to know when to run a custom handler.
695 // So we have to list all legal floating point result types here.
696 if (isTypeLegal(MVT::f32)) {
697 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
698 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
699 }
700 if (isTypeLegal(MVT::f64)) {
701 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
702 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
703 }
704 if (isTypeLegal(MVT::f80)) {
705 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
706 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
707 }
708
709 setOperationAction(ISD::SETCC, MVT::f128, Custom);
710
711 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
712 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
713 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
714 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
715 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
716 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
717 }
718
719 // Always use a library call for pow.
720 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
721 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
722 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
723 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
724
725 setOperationAction(ISD::FLOG, MVT::f80, Expand);
726 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
727 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
728 setOperationAction(ISD::FEXP, MVT::f80, Expand);
729 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
730 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
731 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
732
733 // Some FP actions are always expanded for vector types.
734 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
735 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
736 setOperationAction(ISD::FSIN, VT, Expand);
737 setOperationAction(ISD::FSINCOS, VT, Expand);
738 setOperationAction(ISD::FCOS, VT, Expand);
739 setOperationAction(ISD::FREM, VT, Expand);
740 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
741 setOperationAction(ISD::FPOW, VT, Expand);
742 setOperationAction(ISD::FLOG, VT, Expand);
743 setOperationAction(ISD::FLOG2, VT, Expand);
744 setOperationAction(ISD::FLOG10, VT, Expand);
745 setOperationAction(ISD::FEXP, VT, Expand);
746 setOperationAction(ISD::FEXP2, VT, Expand);
747 }
748
749 // First set operation action for all vector types to either promote
750 // (for widening) or expand (for scalarization). Then we will selectively
751 // turn on ones that can be effectively codegen'd.
752 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
753 setOperationAction(ISD::SDIV, VT, Expand);
754 setOperationAction(ISD::UDIV, VT, Expand);
755 setOperationAction(ISD::SREM, VT, Expand);
756 setOperationAction(ISD::UREM, VT, Expand);
757 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
758 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
759 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
760 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
761 setOperationAction(ISD::FMA, VT, Expand);
762 setOperationAction(ISD::FFLOOR, VT, Expand);
763 setOperationAction(ISD::FCEIL, VT, Expand);
764 setOperationAction(ISD::FTRUNC, VT, Expand);
765 setOperationAction(ISD::FRINT, VT, Expand);
766 setOperationAction(ISD::FNEARBYINT, VT, Expand);
767 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
768 setOperationAction(ISD::MULHS, VT, Expand);
769 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
770 setOperationAction(ISD::MULHU, VT, Expand);
771 setOperationAction(ISD::SDIVREM, VT, Expand);
772 setOperationAction(ISD::UDIVREM, VT, Expand);
773 setOperationAction(ISD::CTPOP, VT, Expand);
774 setOperationAction(ISD::CTTZ, VT, Expand);
775 setOperationAction(ISD::CTLZ, VT, Expand);
776 setOperationAction(ISD::ROTL, VT, Expand);
777 setOperationAction(ISD::ROTR, VT, Expand);
778 setOperationAction(ISD::BSWAP, VT, Expand);
779 setOperationAction(ISD::SETCC, VT, Expand);
780 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
781 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
782 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
783 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
784 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
785 setOperationAction(ISD::TRUNCATE, VT, Expand);
786 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
787 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
788 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
789 setOperationAction(ISD::SELECT_CC, VT, Expand);
790 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
791 setTruncStoreAction(InnerVT, VT, Expand);
792
793 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
794 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
795
796 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
797 // types, we have to deal with them whether we ask for Expansion or not.
798 // Setting Expand causes its own optimisation problems though, so leave
799 // them legal.
800 if (VT.getVectorElementType() == MVT::i1)
801 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
802
803 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
804 // split/scalarized right now.
805 if (VT.getVectorElementType() == MVT::f16)
806 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
807 }
808 }
809
810 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
811 // with -msoft-float, disable use of MMX as well.
812 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
813 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
814 // No operations on x86mmx supported, everything uses intrinsics.
815 }
816
817 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
818 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
819 : &X86::VR128RegClass);
820
821 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
822 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
823 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
824 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
825 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
826 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
827 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
828 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
829 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
830
831 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
832 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
833
834 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom);
835 }
836
837 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
838 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
839 : &X86::VR128RegClass);
840
841 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
842 // registers cannot be used even for integer operations.
843 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
844 : &X86::VR128RegClass);
845 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
846 : &X86::VR128RegClass);
847 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
848 : &X86::VR128RegClass);
849 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
850 : &X86::VR128RegClass);
851
852 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
853 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
854 setOperationAction(ISD::SDIV, VT, Custom);
855 setOperationAction(ISD::SREM, VT, Custom);
856 setOperationAction(ISD::UDIV, VT, Custom);
857 setOperationAction(ISD::UREM, VT, Custom);
858 }
859
860 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
861 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
862 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
863
864 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
865 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
866 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
867 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
868 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
869 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
870 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
871 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
872 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
873 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
874 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
875 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
876 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
877
878 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
879 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
880 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
881 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
882 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
883 }
884
885 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
886 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
887 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
888 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
889 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
890 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
891 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
892 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
893 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
894 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
895 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
896 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
897
898 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
899 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
900 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
901
902 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
903 setOperationAction(ISD::SETCC, VT, Custom);
904 setOperationAction(ISD::CTPOP, VT, Custom);
905 setOperationAction(ISD::ABS, VT, Custom);
906
907 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
908 // setcc all the way to isel and prefer SETGT in some isel patterns.
909 setCondCodeAction(ISD::SETLT, VT, Custom);
910 setCondCodeAction(ISD::SETLE, VT, Custom);
911 }
912
913 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
914 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
915 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
916 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
917 setOperationAction(ISD::VSELECT, VT, Custom);
918 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
919 }
920
921 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
922 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
923 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
924 setOperationAction(ISD::VSELECT, VT, Custom);
925
926 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
927 continue;
928
929 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
930 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
931 }
932
933 // Custom lower v2i64 and v2f64 selects.
934 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
935 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
936 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
937 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
938 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
939
940 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
941 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
942
943 // Custom legalize these to avoid over promotion or custom promotion.
944 setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
945 setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
946 setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
947 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
948 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
949 setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
950 setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
951 setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
952 setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
953 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
954
955 // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
956 // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
957 // split again based on the input type, this will cause an AssertSExt i16 to
958 // be emitted instead of an AssertZExt. This will allow packssdw followed by
959 // packuswb to be used to truncate to v8i8. This is necessary since packusdw
960 // isn't available until sse4.1.
961 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
962
963 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
964 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
965
966 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
967
968 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
969 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
970
971 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
972 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
973
974 // We want to legalize this to an f64 load rather than an i64 load on
975 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
976 // store.
977 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
978 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
979 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
980 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
981 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
982 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
983
984 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
985 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
986 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
987 if (!Subtarget.hasAVX512())
988 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
989
990 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
991 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
992 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
993
994 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
995
996 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
997 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
998 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
999 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1000 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1001 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1002
1003 // In the customized shift lowering, the legal v4i32/v2i64 cases
1004 // in AVX2 will be recognized.
1005 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1006 setOperationAction(ISD::SRL, VT, Custom);
1007 setOperationAction(ISD::SHL, VT, Custom);
1008 setOperationAction(ISD::SRA, VT, Custom);
1009 }
1010
1011 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1012 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1013
1014 // With AVX512, expanding (and promoting the shifts) is better.
1015 if (!Subtarget.hasAVX512())
1016 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1017 }
1018
1019 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1020 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1021 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1022 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1023 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1024 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1025 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1026 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1027 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1028
1029 // These might be better off as horizontal vector ops.
1030 setOperationAction(ISD::ADD, MVT::i16, Custom);
1031 setOperationAction(ISD::ADD, MVT::i32, Custom);
1032 setOperationAction(ISD::SUB, MVT::i16, Custom);
1033 setOperationAction(ISD::SUB, MVT::i32, Custom);
1034 }
1035
1036 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1037 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1038 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1039 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1040 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1041 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1042 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1043 }
1044
1045 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1046 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1047 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1048 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1049 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1050 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1051 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1052 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1053
1054 // FIXME: Do we need to handle scalar-to-vector here?
1055 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1056
1057 // We directly match byte blends in the backend as they match the VSELECT
1058 // condition form.
1059 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1060
1061 // SSE41 brings specific instructions for doing vector sign extend even in
1062 // cases where we don't have SRA.
1063 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1064 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1065 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1066 }
1067
1068 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1069 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1070 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1071 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1072 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1073 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1074 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1075 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1076 }
1077
1078 // i8 vectors are custom because the source register and source
1079 // source memory operand types are not the same width.
1080 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1081 }
1082
1083 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1084 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1085 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1086 setOperationAction(ISD::ROTL, VT, Custom);
1087
1088 // XOP can efficiently perform BITREVERSE with VPPERM.
1089 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1090 setOperationAction(ISD::BITREVERSE, VT, Custom);
1091
1092 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1093 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1094 setOperationAction(ISD::BITREVERSE, VT, Custom);
1095 }
1096
1097 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1098 bool HasInt256 = Subtarget.hasInt256();
1099
1100 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1101 : &X86::VR256RegClass);
1102 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1103 : &X86::VR256RegClass);
1104 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1105 : &X86::VR256RegClass);
1106 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1107 : &X86::VR256RegClass);
1108 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1109 : &X86::VR256RegClass);
1110 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1111 : &X86::VR256RegClass);
1112
1113 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1114 setOperationAction(ISD::FFLOOR, VT, Legal);
1115 setOperationAction(ISD::FCEIL, VT, Legal);
1116 setOperationAction(ISD::FTRUNC, VT, Legal);
1117 setOperationAction(ISD::FRINT, VT, Legal);
1118 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1119 setOperationAction(ISD::FNEG, VT, Custom);
1120 setOperationAction(ISD::FABS, VT, Custom);
1121 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1122 }
1123
1124 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1125 // even though v8i16 is a legal type.
1126 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1127 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1128 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1129
1130 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1131
1132 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom);
1133
1134 if (!Subtarget.hasAVX512())
1135 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1136
1137 // In the customized shift lowering, the legal v8i32/v4i64 cases
1138 // in AVX2 will be recognized.
1139 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1140 setOperationAction(ISD::SRL, VT, Custom);
1141 setOperationAction(ISD::SHL, VT, Custom);
1142 setOperationAction(ISD::SRA, VT, Custom);
1143 }
1144
1145 // These types need custom splitting if their input is a 128-bit vector.
1146 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1147 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1148 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1149 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1150
1151 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1152 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1153
1154 // With BWI, expanding (and promoting the shifts) is the better.
1155 if (!Subtarget.hasBWI())
1156 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1157
1158 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1159 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1160 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1161 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1162 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1163 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1164
1165 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1166 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1167 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1168 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1169 }
1170
1171 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1172 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1173 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1174 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1175
1176 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1177 setOperationAction(ISD::SETCC, VT, Custom);
1178 setOperationAction(ISD::CTPOP, VT, Custom);
1179 setOperationAction(ISD::CTLZ, VT, Custom);
1180
1181 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1182 // setcc all the way to isel and prefer SETGT in some isel patterns.
1183 setCondCodeAction(ISD::SETLT, VT, Custom);
1184 setCondCodeAction(ISD::SETLE, VT, Custom);
1185 }
1186
1187 if (Subtarget.hasAnyFMA()) {
1188 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1189 MVT::v2f64, MVT::v4f64 })
1190 setOperationAction(ISD::FMA, VT, Legal);
1191 }
1192
1193 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1194 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1195 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1196 }
1197
1198 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1199 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1200 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1201 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1202
1203 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1204 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1205 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1206 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1207 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1208 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1209
1210 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1211 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1212 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1213 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1214 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1215
1216 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1217 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1218 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1219 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1220 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1221 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1222 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1223 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1224
1225 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1226 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1227 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1228 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1229 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1230 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1231 }
1232
1233 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1234 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1235 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1236 }
1237
1238 if (HasInt256) {
1239 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1240 // when we have a 256bit-wide blend with immediate.
1241 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1242
1243 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1244 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1245 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1246 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1247 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1248 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1249 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1250 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1251 }
1252 }
1253
1254 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1255 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1256 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1257 setOperationAction(ISD::MSTORE, VT, Legal);
1258 }
1259
1260 // Extract subvector is special because the value type
1261 // (result) is 128-bit but the source is 256-bit wide.
1262 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1263 MVT::v4f32, MVT::v2f64 }) {
1264 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1265 }
1266
1267 // Custom lower several nodes for 256-bit types.
1268 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1269 MVT::v8f32, MVT::v4f64 }) {
1270 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1271 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1272 setOperationAction(ISD::VSELECT, VT, Custom);
1273 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1274 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1275 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1276 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1277 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1278 setOperationAction(ISD::STORE, VT, Custom);
1279 }
1280
1281 if (HasInt256) {
1282 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1283
1284 // Custom legalize 2x32 to get a little better code.
1285 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1286 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1287
1288 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1289 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1290 setOperationAction(ISD::MGATHER, VT, Custom);
1291 }
1292 }
1293
1294 // This block controls legalization of the mask vector sizes that are
1295 // available with AVX512. 512-bit vectors are in a separate block controlled
1296 // by useAVX512Regs.
1297 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1298 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1299 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1300 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1301 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1302 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1303
1304 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1305 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1306 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1307
1308 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1309 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1310 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1311 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1312 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1313 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1314
1315 // There is no byte sized k-register load or store without AVX512DQ.
1316 if (!Subtarget.hasDQI()) {
1317 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1318 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1319 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1320 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1321
1322 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1323 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1324 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1325 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1326 }
1327
1328 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1329 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1330 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1331 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1332 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1333 }
1334
1335 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1336 setOperationAction(ISD::ADD, VT, Custom);
1337 setOperationAction(ISD::SUB, VT, Custom);
1338 setOperationAction(ISD::MUL, VT, Custom);
1339 setOperationAction(ISD::SETCC, VT, Custom);
1340 setOperationAction(ISD::SELECT, VT, Custom);
1341 setOperationAction(ISD::TRUNCATE, VT, Custom);
1342 setOperationAction(ISD::UADDSAT, VT, Custom);
1343 setOperationAction(ISD::SADDSAT, VT, Custom);
1344 setOperationAction(ISD::USUBSAT, VT, Custom);
1345 setOperationAction(ISD::SSUBSAT, VT, Custom);
1346
1347 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1348 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1349 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1350 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1351 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1352 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1353 setOperationAction(ISD::VSELECT, VT, Expand);
1354 }
1355
1356 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1357 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1358 }
1359
1360 // This block controls legalization for 512-bit operations with 32/64 bit
1361 // elements. 512-bits can be disabled based on prefer-vector-width and
1362 // required-vector-width function attributes.
1363 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1364 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1365 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1366 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1367 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1368
1369 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1370 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1371 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1372 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1373 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1374 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1375 }
1376
1377 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1378 setOperationAction(ISD::FNEG, VT, Custom);
1379 setOperationAction(ISD::FABS, VT, Custom);
1380 setOperationAction(ISD::FMA, VT, Legal);
1381 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1382 }
1383
1384 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1385 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1386 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1387 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1388 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1389 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1390 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1391 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1392 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1393 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1394
1395 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom);
1396
1397 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1398 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1399 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1400 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1401 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1402
1403 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1404 // to 512-bit rather than use the AVX2 instructions so that we can use
1405 // k-masks.
1406 if (!Subtarget.hasVLX()) {
1407 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1408 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1409 setOperationAction(ISD::MLOAD, VT, Custom);
1410 setOperationAction(ISD::MSTORE, VT, Custom);
1411 }
1412 }
1413
1414 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1415 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1416 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1417 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1418 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1419 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1420 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1421 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1422
1423 // Need to custom widen this if we don't have AVX512BW.
1424 setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
1425 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
1426 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
1427
1428 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1429 setOperationAction(ISD::FFLOOR, VT, Legal);
1430 setOperationAction(ISD::FCEIL, VT, Legal);
1431 setOperationAction(ISD::FTRUNC, VT, Legal);
1432 setOperationAction(ISD::FRINT, VT, Legal);
1433 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1434
1435 setOperationAction(ISD::SELECT, VT, Custom);
1436 }
1437
1438 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1439 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
1440 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1441 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1442 }
1443
1444 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1445 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1446 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1447 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1448
1449 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1450 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1451
1452 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1453 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1454
1455 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1456 setOperationAction(ISD::SMAX, VT, Legal);
1457 setOperationAction(ISD::UMAX, VT, Legal);
1458 setOperationAction(ISD::SMIN, VT, Legal);
1459 setOperationAction(ISD::UMIN, VT, Legal);
1460 setOperationAction(ISD::ABS, VT, Legal);
1461 setOperationAction(ISD::SRL, VT, Custom);
1462 setOperationAction(ISD::SHL, VT, Custom);
1463 setOperationAction(ISD::SRA, VT, Custom);
1464 setOperationAction(ISD::CTPOP, VT, Custom);
1465 setOperationAction(ISD::ROTL, VT, Custom);
1466 setOperationAction(ISD::ROTR, VT, Custom);
1467 setOperationAction(ISD::SETCC, VT, Custom);
1468 setOperationAction(ISD::SELECT, VT, Custom);
1469
1470 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1471 // setcc all the way to isel and prefer SETGT in some isel patterns.
1472 setCondCodeAction(ISD::SETLT, VT, Custom);
1473 setCondCodeAction(ISD::SETLE, VT, Custom);
1474 }
1475
1476 if (Subtarget.hasDQI()) {
1477 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1478 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1479 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1480 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1481
1482 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1483 }
1484
1485 if (Subtarget.hasCDI()) {
1486 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1487 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1488 setOperationAction(ISD::CTLZ, VT, Legal);
1489 }
1490 } // Subtarget.hasCDI()
1491
1492 if (Subtarget.hasVPOPCNTDQ()) {
1493 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1494 setOperationAction(ISD::CTPOP, VT, Legal);
1495 }
1496
1497 // Extract subvector is special because the value type
1498 // (result) is 256-bit but the source is 512-bit wide.
1499 // 128-bit was made Legal under AVX1.
1500 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1501 MVT::v8f32, MVT::v4f64 })
1502 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1503
1504 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1505 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1506 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1507 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1508 setOperationAction(ISD::VSELECT, VT, Custom);
1509 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1510 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1511 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1512 setOperationAction(ISD::MLOAD, VT, Legal);
1513 setOperationAction(ISD::MSTORE, VT, Legal);
1514 setOperationAction(ISD::MGATHER, VT, Custom);
1515 setOperationAction(ISD::MSCATTER, VT, Custom);
1516 }
1517 if (!Subtarget.hasBWI()) {
1518 // Need to custom split v32i16/v64i8 bitcasts.
1519 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1520 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1521
1522 // Better to split these into two 256-bit ops.
1523 setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom);
1524 setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom);
1525 }
1526
1527 if (Subtarget.hasVBMI2()) {
1528 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1529 setOperationAction(ISD::FSHL, VT, Custom);
1530 setOperationAction(ISD::FSHR, VT, Custom);
1531 }
1532 }
1533 }// has AVX-512
1534
1535 // This block controls legalization for operations that don't have
1536 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1537 // narrower widths.
1538 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1539 // These operations are handled on non-VLX by artificially widening in
1540 // isel patterns.
1541 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1542
1543 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1544 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1545 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1546 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1547 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1548
1549 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1550 setOperationAction(ISD::SMAX, VT, Legal);
1551 setOperationAction(ISD::UMAX, VT, Legal);
1552 setOperationAction(ISD::SMIN, VT, Legal);
1553 setOperationAction(ISD::UMIN, VT, Legal);
1554 setOperationAction(ISD::ABS, VT, Legal);
1555 }
1556
1557 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1558 setOperationAction(ISD::ROTL, VT, Custom);
1559 setOperationAction(ISD::ROTR, VT, Custom);
1560 }
1561
1562 // Custom legalize 2x32 to get a little better code.
1563 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1564 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1565
1566 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1567 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1568 setOperationAction(ISD::MSCATTER, VT, Custom);
1569
1570 if (Subtarget.hasDQI()) {
1571 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1572 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1573 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1574 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1575 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1576
1577 setOperationAction(ISD::MUL, VT, Legal);
1578 }
1579 }
1580
1581 if (Subtarget.hasCDI()) {
1582 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1583 setOperationAction(ISD::CTLZ, VT, Legal);
1584 }
1585 } // Subtarget.hasCDI()
1586
1587 if (Subtarget.hasVPOPCNTDQ()) {
1588 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1589 setOperationAction(ISD::CTPOP, VT, Legal);
1590 }
1591 }
1592
1593 // This block control legalization of v32i1/v64i1 which are available with
1594 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1595 // useBWIRegs.
1596 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1597 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1598 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1599
1600 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1601 setOperationAction(ISD::ADD, VT, Custom);
1602 setOperationAction(ISD::SUB, VT, Custom);
1603 setOperationAction(ISD::MUL, VT, Custom);
1604 setOperationAction(ISD::VSELECT, VT, Expand);
1605 setOperationAction(ISD::UADDSAT, VT, Custom);
1606 setOperationAction(ISD::SADDSAT, VT, Custom);
1607 setOperationAction(ISD::USUBSAT, VT, Custom);
1608 setOperationAction(ISD::SSUBSAT, VT, Custom);
1609
1610 setOperationAction(ISD::TRUNCATE, VT, Custom);
1611 setOperationAction(ISD::SETCC, VT, Custom);
1612 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1613 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1614 setOperationAction(ISD::SELECT, VT, Custom);
1615 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1616 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1617 }
1618
1619 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1620 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1621 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1622 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1623 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1624 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1625
1626 // Extends from v32i1 masks to 256-bit vectors.
1627 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1628 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1629 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1630 }
1631
1632 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1633 // disabled based on prefer-vector-width and required-vector-width function
1634 // attributes.
1635 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1636 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1637 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1638
1639 // Extends from v64i1 masks to 512-bit vectors.
1640 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1641 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1642 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1643
1644 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1645 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1646 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1647 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1648 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1649 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1650 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1651 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1652 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1653 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1654 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1655 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1656 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1657 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1658 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1659 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1660 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1661 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1662 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1663 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1664 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1665 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1666 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1667
1668 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1669 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1670
1671 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1672
1673 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1674 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1675 setOperationAction(ISD::VSELECT, VT, Custom);
1676 setOperationAction(ISD::ABS, VT, Legal);
1677 setOperationAction(ISD::SRL, VT, Custom);
1678 setOperationAction(ISD::SHL, VT, Custom);
1679 setOperationAction(ISD::SRA, VT, Custom);
1680 setOperationAction(ISD::MLOAD, VT, Legal);
1681 setOperationAction(ISD::MSTORE, VT, Legal);
1682 setOperationAction(ISD::CTPOP, VT, Custom);
1683 setOperationAction(ISD::CTLZ, VT, Custom);
1684 setOperationAction(ISD::SMAX, VT, Legal);
1685 setOperationAction(ISD::UMAX, VT, Legal);
1686 setOperationAction(ISD::SMIN, VT, Legal);
1687 setOperationAction(ISD::UMIN, VT, Legal);
1688 setOperationAction(ISD::SETCC, VT, Custom);
1689 setOperationAction(ISD::UADDSAT, VT, Legal);
1690 setOperationAction(ISD::SADDSAT, VT, Legal);
1691 setOperationAction(ISD::USUBSAT, VT, Legal);
1692 setOperationAction(ISD::SSUBSAT, VT, Legal);
1693 setOperationAction(ISD::SELECT, VT, Custom);
1694
1695 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1696 // setcc all the way to isel and prefer SETGT in some isel patterns.
1697 setCondCodeAction(ISD::SETLT, VT, Custom);
1698 setCondCodeAction(ISD::SETLE, VT, Custom);
1699 }
1700
1701 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1702 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1703 }
1704
1705 if (Subtarget.hasBITALG()) {
1706 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1707 setOperationAction(ISD::CTPOP, VT, Legal);
1708 }
1709
1710 if (Subtarget.hasVBMI2()) {
1711 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1712 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1713 }
1714 }
1715
1716 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1717 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1718 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1719 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1720 }
1721
1722 // These operations are handled on non-VLX by artificially widening in
1723 // isel patterns.
1724 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1725
1726 if (Subtarget.hasBITALG()) {
1727 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1728 setOperationAction(ISD::CTPOP, VT, Legal);
1729 }
1730 }
1731
1732 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1733 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1734 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1735 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1736 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1737 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1738
1739 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1740 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1741 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1742 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1743 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1744
1745 if (Subtarget.hasDQI()) {
1746 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1747 // v2f32 UINT_TO_FP is already custom under SSE2.
1748 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1749 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1750, __PRETTY_FUNCTION__))
1750 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1750, __PRETTY_FUNCTION__))
;
1751 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1752 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1753 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1754 }
1755
1756 if (Subtarget.hasBWI()) {
1757 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1758 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1759 }
1760
1761 if (Subtarget.hasVBMI2()) {
1762 // TODO: Make these legal even without VLX?
1763 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1764 MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1765 setOperationAction(ISD::FSHL, VT, Custom);
1766 setOperationAction(ISD::FSHR, VT, Custom);
1767 }
1768 }
1769
1770 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1771 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1772 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1773 }
1774
1775 // We want to custom lower some of our intrinsics.
1776 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1777 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1778 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1779 if (!Subtarget.is64Bit()) {
1780 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1781 }
1782
1783 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1784 // handle type legalization for these operations here.
1785 //
1786 // FIXME: We really should do custom legalization for addition and
1787 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1788 // than generic legalization for 64-bit multiplication-with-overflow, though.
1789 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1790 if (VT == MVT::i64 && !Subtarget.is64Bit())
1791 continue;
1792 // Add/Sub/Mul with overflow operations are custom lowered.
1793 setOperationAction(ISD::SADDO, VT, Custom);
1794 setOperationAction(ISD::UADDO, VT, Custom);
1795 setOperationAction(ISD::SSUBO, VT, Custom);
1796 setOperationAction(ISD::USUBO, VT, Custom);
1797 setOperationAction(ISD::SMULO, VT, Custom);
1798 setOperationAction(ISD::UMULO, VT, Custom);
1799
1800 // Support carry in as value rather than glue.
1801 setOperationAction(ISD::ADDCARRY, VT, Custom);
1802 setOperationAction(ISD::SUBCARRY, VT, Custom);
1803 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1804 }
1805
1806 if (!Subtarget.is64Bit()) {
1807 // These libcalls are not available in 32-bit.
1808 setLibcallName(RTLIB::SHL_I128, nullptr);
1809 setLibcallName(RTLIB::SRL_I128, nullptr);
1810 setLibcallName(RTLIB::SRA_I128, nullptr);
1811 setLibcallName(RTLIB::MUL_I128, nullptr);
1812 }
1813
1814 // Combine sin / cos into _sincos_stret if it is available.
1815 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1816 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1817 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1818 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1819 }
1820
1821 if (Subtarget.isTargetWin64()) {
1822 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1823 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1824 setOperationAction(ISD::SREM, MVT::i128, Custom);
1825 setOperationAction(ISD::UREM, MVT::i128, Custom);
1826 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1827 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1828 }
1829
1830 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1831 // is. We should promote the value to 64-bits to solve this.
1832 // This is what the CRT headers do - `fmodf` is an inline header
1833 // function casting to f64 and calling `fmod`.
1834 if (Subtarget.is32Bit() &&
1835 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
1836 for (ISD::NodeType Op :
1837 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1838 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1839 if (isOperationExpand(Op, MVT::f32))
1840 setOperationAction(Op, MVT::f32, Promote);
1841
1842 // We have target-specific dag combine patterns for the following nodes:
1843 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1844 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1845 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1846 setTargetDAGCombine(ISD::CONCAT_VECTORS);
1847 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1848 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1849 setTargetDAGCombine(ISD::BITCAST);
1850 setTargetDAGCombine(ISD::VSELECT);
1851 setTargetDAGCombine(ISD::SELECT);
1852 setTargetDAGCombine(ISD::SHL);
1853 setTargetDAGCombine(ISD::SRA);
1854 setTargetDAGCombine(ISD::SRL);
1855 setTargetDAGCombine(ISD::OR);
1856 setTargetDAGCombine(ISD::AND);
1857 setTargetDAGCombine(ISD::ADD);
1858 setTargetDAGCombine(ISD::FADD);
1859 setTargetDAGCombine(ISD::FSUB);
1860 setTargetDAGCombine(ISD::FNEG);
1861 setTargetDAGCombine(ISD::FMA);
1862 setTargetDAGCombine(ISD::FMINNUM);
1863 setTargetDAGCombine(ISD::FMAXNUM);
1864 setTargetDAGCombine(ISD::SUB);
1865 setTargetDAGCombine(ISD::LOAD);
1866 setTargetDAGCombine(ISD::MLOAD);
1867 setTargetDAGCombine(ISD::STORE);
1868 setTargetDAGCombine(ISD::MSTORE);
1869 setTargetDAGCombine(ISD::TRUNCATE);
1870 setTargetDAGCombine(ISD::ZERO_EXTEND);
1871 setTargetDAGCombine(ISD::ANY_EXTEND);
1872 setTargetDAGCombine(ISD::SIGN_EXTEND);
1873 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1874 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
1875 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1876 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1877 setTargetDAGCombine(ISD::SINT_TO_FP);
1878 setTargetDAGCombine(ISD::UINT_TO_FP);
1879 setTargetDAGCombine(ISD::SETCC);
1880 setTargetDAGCombine(ISD::MUL);
1881 setTargetDAGCombine(ISD::XOR);
1882 setTargetDAGCombine(ISD::MSCATTER);
1883 setTargetDAGCombine(ISD::MGATHER);
1884
1885 computeRegisterProperties(Subtarget.getRegisterInfo());
1886
1887 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1888 MaxStoresPerMemsetOptSize = 8;
1889 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1890 MaxStoresPerMemcpyOptSize = 4;
1891 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1892 MaxStoresPerMemmoveOptSize = 4;
1893
1894 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1895 // that needs to benchmarked and balanced with the potential use of vector
1896 // load/store types (PR33329, PR33914).
1897 MaxLoadsPerMemcmp = 2;
1898 MaxLoadsPerMemcmpOptSize = 2;
1899
1900 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1901 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
1902
1903 // An out-of-order CPU can speculatively execute past a predictable branch,
1904 // but a conditional move could be stalled by an expensive earlier operation.
1905 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1906 EnableExtLdPromotion = true;
1907 setPrefFunctionAlignment(Align(16));
1908
1909 verifyIntrinsicTables();
1910}
1911
1912// This has so far only been implemented for 64-bit MachO.
1913bool X86TargetLowering::useLoadStackGuardNode() const {
1914 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1915}
1916
1917bool X86TargetLowering::useStackGuardXorFP() const {
1918 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1919 return Subtarget.getTargetTriple().isOSMSVCRT();
1920}
1921
1922SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1923 const SDLoc &DL) const {
1924 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1925 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1926 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1927 return SDValue(Node, 0);
1928}
1929
1930TargetLoweringBase::LegalizeTypeAction
1931X86TargetLowering::getPreferredVectorAction(MVT VT) const {
1932 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1933 return TypeSplitVector;
1934
1935 if (VT.getVectorNumElements() != 1 &&
1936 VT.getVectorElementType() != MVT::i1)
1937 return TypeWidenVector;
1938
1939 return TargetLoweringBase::getPreferredVectorAction(VT);
1940}
1941
1942MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1943 CallingConv::ID CC,
1944 EVT VT) const {
1945 // v32i1 vectors should be promoted to v32i8 to match avx2.
1946 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1947 return MVT::v32i8;
1948 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
1949 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
1950 Subtarget.hasAVX512() &&
1951 (!isPowerOf2_32(VT.getVectorNumElements()) ||
1952 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
1953 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
1954 return MVT::i8;
1955 // FIXME: Should we just make these types legal and custom split operations?
1956 if ((VT == MVT::v32i16 || VT == MVT::v64i8) &&
1957 Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI)
1958 return MVT::v16i32;
1959 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1960}
1961
1962unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1963 CallingConv::ID CC,
1964 EVT VT) const {
1965 // v32i1 vectors should be promoted to v32i8 to match avx2.
1966 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1967 return 1;
1968 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
1969 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
1970 Subtarget.hasAVX512() &&
1971 (!isPowerOf2_32(VT.getVectorNumElements()) ||
1972 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
1973 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
1974 return VT.getVectorNumElements();
1975 // FIXME: Should we just make these types legal and custom split operations?
1976 if ((VT == MVT::v32i16 || VT == MVT::v64i8) &&
1977 Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI)
1978 return 1;
1979 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1980}
1981
1982unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
1983 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1984 unsigned &NumIntermediates, MVT &RegisterVT) const {
1985 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
1986 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
1987 Subtarget.hasAVX512() &&
1988 (!isPowerOf2_32(VT.getVectorNumElements()) ||
1989 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
1990 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
1991 RegisterVT = MVT::i8;
1992 IntermediateVT = MVT::i1;
1993 NumIntermediates = VT.getVectorNumElements();
1994 return NumIntermediates;
1995 }
1996
1997 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
1998 NumIntermediates, RegisterVT);
1999}
2000
2001EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2002 LLVMContext& Context,
2003 EVT VT) const {
2004 if (!VT.isVector())
2005 return MVT::i8;
2006
2007 if (Subtarget.hasAVX512()) {
2008 const unsigned NumElts = VT.getVectorNumElements();
2009
2010 // Figure out what this type will be legalized to.
2011 EVT LegalVT = VT;
2012 while (getTypeAction(Context, LegalVT) != TypeLegal)
2013 LegalVT = getTypeToTransformTo(Context, LegalVT);
2014
2015 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2016 if (LegalVT.getSimpleVT().is512BitVector())
2017 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2018
2019 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2020 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2021 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2022 // vXi16/vXi8.
2023 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2024 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2025 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2026 }
2027 }
2028
2029 return VT.changeVectorElementTypeToInteger();
2030}
2031
2032/// Helper for getByValTypeAlignment to determine
2033/// the desired ByVal argument alignment.
2034static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
2035 if (MaxAlign == 16)
2036 return;
2037 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2038 if (VTy->getBitWidth() == 128)
2039 MaxAlign = 16;
2040 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2041 unsigned EltAlign = 0;
2042 getMaxByValAlign(ATy->getElementType(), EltAlign);
2043 if (EltAlign > MaxAlign)
2044 MaxAlign = EltAlign;
2045 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2046 for (auto *EltTy : STy->elements()) {
2047 unsigned EltAlign = 0;
2048 getMaxByValAlign(EltTy, EltAlign);
2049 if (EltAlign > MaxAlign)
2050 MaxAlign = EltAlign;
2051 if (MaxAlign == 16)
2052 break;
2053 }
2054 }
2055}
2056
2057/// Return the desired alignment for ByVal aggregate
2058/// function arguments in the caller parameter area. For X86, aggregates
2059/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2060/// are at 4-byte boundaries.
2061unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2062 const DataLayout &DL) const {
2063 if (Subtarget.is64Bit()) {
2064 // Max of 8 and alignment of type.
2065 unsigned TyAlign = DL.getABITypeAlignment(Ty);
2066 if (TyAlign > 8)
2067 return TyAlign;
2068 return 8;
2069 }
2070
2071 unsigned Align = 4;
2072 if (Subtarget.hasSSE1())
2073 getMaxByValAlign(Ty, Align);
2074 return Align;
2075}
2076
2077/// Returns the target specific optimal type for load
2078/// and store operations as a result of memset, memcpy, and memmove
2079/// lowering. If DstAlign is zero that means it's safe to destination
2080/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
2081/// means there isn't a need to check it against alignment requirement,
2082/// probably because the source does not need to be loaded. If 'IsMemset' is
2083/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
2084/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
2085/// source is constant so it does not need to be loaded.
2086/// It returns EVT::Other if the type should be determined using generic
2087/// target-independent logic.
2088/// For vector ops we check that the overall size isn't larger than our
2089/// preferred vector width.
2090EVT X86TargetLowering::getOptimalMemOpType(
2091 uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
2092 bool ZeroMemset, bool MemcpyStrSrc,
2093 const AttributeList &FuncAttributes) const {
2094 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2095 if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
2096 ((DstAlign == 0 || DstAlign >= 16) &&
2097 (SrcAlign == 0 || SrcAlign >= 16)))) {
2098 // FIXME: Check if unaligned 64-byte accesses are slow.
2099 if (Size >= 64 && Subtarget.hasAVX512() &&
2100 (Subtarget.getPreferVectorWidth() >= 512)) {
2101 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2102 }
2103 // FIXME: Check if unaligned 32-byte accesses are slow.
2104 if (Size >= 32 && Subtarget.hasAVX() &&
2105 (Subtarget.getPreferVectorWidth() >= 256)) {
2106 // Although this isn't a well-supported type for AVX1, we'll let
2107 // legalization and shuffle lowering produce the optimal codegen. If we
2108 // choose an optimal type with a vector element larger than a byte,
2109 // getMemsetStores() may create an intermediate splat (using an integer
2110 // multiply) before we splat as a vector.
2111 return MVT::v32i8;
2112 }
2113 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2114 return MVT::v16i8;
2115 // TODO: Can SSE1 handle a byte vector?
2116 // If we have SSE1 registers we should be able to use them.
2117 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2118 (Subtarget.getPreferVectorWidth() >= 128))
2119 return MVT::v4f32;
2120 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
2121 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2122 // Do not use f64 to lower memcpy if source is string constant. It's
2123 // better to use i32 to avoid the loads.
2124 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2125 // The gymnastics of splatting a byte value into an XMM register and then
2126 // only using 8-byte stores (because this is a CPU with slow unaligned
2127 // 16-byte accesses) makes that a loser.
2128 return MVT::f64;
2129 }
2130 }
2131 // This is a compromise. If we reach here, unaligned accesses may be slow on
2132 // this target. However, creating smaller, aligned accesses could be even
2133 // slower and would certainly be a lot more code.
2134 if (Subtarget.is64Bit() && Size >= 8)
2135 return MVT::i64;
2136 return MVT::i32;
2137}
2138
2139bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2140 if (VT == MVT::f32)
2141 return X86ScalarSSEf32;
2142 else if (VT == MVT::f64)
2143 return X86ScalarSSEf64;
2144 return true;
2145}
2146
2147bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2148 EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
2149 bool *Fast) const {
2150 if (Fast) {
2151 switch (VT.getSizeInBits()) {
2152 default:
2153 // 8-byte and under are always assumed to be fast.
2154 *Fast = true;
2155 break;
2156 case 128:
2157 *Fast = !Subtarget.isUnalignedMem16Slow();
2158 break;
2159 case 256:
2160 *Fast = !Subtarget.isUnalignedMem32Slow();
2161 break;
2162 // TODO: What about AVX-512 (512-bit) accesses?
2163 }
2164 }
2165 // NonTemporal vector memory ops must be aligned.
2166 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2167 // NT loads can only be vector aligned, so if its less aligned than the
2168 // minimum vector size (which we can split the vector down to), we might as
2169 // well use a regular unaligned vector load.
2170 // We don't have any NT loads pre-SSE41.
2171 if (!!(Flags & MachineMemOperand::MOLoad))
2172 return (Align < 16 || !Subtarget.hasSSE41());
2173 return false;
2174 }
2175 // Misaligned accesses of any size are always allowed.
2176 return true;
2177}
2178
2179/// Return the entry encoding for a jump table in the
2180/// current function. The returned value is a member of the
2181/// MachineJumpTableInfo::JTEntryKind enum.
2182unsigned X86TargetLowering::getJumpTableEncoding() const {
2183 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2184 // symbol.
2185 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2186 return MachineJumpTableInfo::EK_Custom32;
2187
2188 // Otherwise, use the normal jump table encoding heuristics.
2189 return TargetLowering::getJumpTableEncoding();
2190}
2191
2192bool X86TargetLowering::useSoftFloat() const {
2193 return Subtarget.useSoftFloat();
2194}
2195
2196void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2197 ArgListTy &Args) const {
2198
2199 // Only relabel X86-32 for C / Stdcall CCs.
2200 if (Subtarget.is64Bit())
2201 return;
2202 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2203 return;
2204 unsigned ParamRegs = 0;
2205 if (auto *M = MF->getFunction().getParent())
2206 ParamRegs = M->getNumberRegisterParameters();
2207
2208 // Mark the first N int arguments as having reg
2209 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2210 Type *T = Args[Idx].Ty;
2211 if (T->isIntOrPtrTy())
2212 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2213 unsigned numRegs = 1;
2214 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2215 numRegs = 2;
2216 if (ParamRegs < numRegs)
2217 return;
2218 ParamRegs -= numRegs;
2219 Args[Idx].IsInReg = true;
2220 }
2221 }
2222}
2223
2224const MCExpr *
2225X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2226 const MachineBasicBlock *MBB,
2227 unsigned uid,MCContext &Ctx) const{
2228 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2228, __PRETTY_FUNCTION__))
;
2229 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2230 // entries.
2231 return MCSymbolRefExpr::create(MBB->getSymbol(),
2232 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2233}
2234
2235/// Returns relocation base for the given PIC jumptable.
2236SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2237 SelectionDAG &DAG) const {
2238 if (!Subtarget.is64Bit())
2239 // This doesn't have SDLoc associated with it, but is not really the
2240 // same as a Register.
2241 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2242 getPointerTy(DAG.getDataLayout()));
2243 return Table;
2244}
2245
2246/// This returns the relocation base for the given PIC jumptable,
2247/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2248const MCExpr *X86TargetLowering::
2249getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2250 MCContext &Ctx) const {
2251 // X86-64 uses RIP relative addressing based on the jump table label.
2252 if (Subtarget.isPICStyleRIPRel())
2253 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2254
2255 // Otherwise, the reference is relative to the PIC base.
2256 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2257}
2258
2259std::pair<const TargetRegisterClass *, uint8_t>
2260X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2261 MVT VT) const {
2262 const TargetRegisterClass *RRC = nullptr;
2263 uint8_t Cost = 1;
2264 switch (VT.SimpleTy) {
2265 default:
2266 return TargetLowering::findRepresentativeClass(TRI, VT);
2267 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2268 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2269 break;
2270 case MVT::x86mmx:
2271 RRC = &X86::VR64RegClass;
2272 break;
2273 case MVT::f32: case MVT::f64:
2274 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2275 case MVT::v4f32: case MVT::v2f64:
2276 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2277 case MVT::v8f32: case MVT::v4f64:
2278 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2279 case MVT::v16f32: case MVT::v8f64:
2280 RRC = &X86::VR128XRegClass;
2281 break;
2282 }
2283 return std::make_pair(RRC, Cost);
2284}
2285
2286unsigned X86TargetLowering::getAddressSpace() const {
2287 if (Subtarget.is64Bit())
2288 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2289 return 256;
2290}
2291
2292static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2293 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2294 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2295}
2296
2297static Constant* SegmentOffset(IRBuilder<> &IRB,
2298 unsigned Offset, unsigned AddressSpace) {
2299 return ConstantExpr::getIntToPtr(
2300 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2301 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2302}
2303
2304Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2305 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2306 // tcbhead_t; use it instead of the usual global variable (see
2307 // sysdeps/{i386,x86_64}/nptl/tls.h)
2308 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2309 if (Subtarget.isTargetFuchsia()) {
2310 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2311 return SegmentOffset(IRB, 0x10, getAddressSpace());
2312 } else {
2313 // %fs:0x28, unless we're using a Kernel code model, in which case
2314 // it's %gs:0x28. gs:0x14 on i386.
2315 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2316 return SegmentOffset(IRB, Offset, getAddressSpace());
2317 }
2318 }
2319
2320 return TargetLowering::getIRStackGuard(IRB);
2321}
2322
2323void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2324 // MSVC CRT provides functionalities for stack protection.
2325 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2326 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2327 // MSVC CRT has a global variable holding security cookie.
2328 M.getOrInsertGlobal("__security_cookie",
2329 Type::getInt8PtrTy(M.getContext()));
2330
2331 // MSVC CRT has a function to validate security cookie.
2332 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2333 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2334 Type::getInt8PtrTy(M.getContext()));
2335 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2336 F->setCallingConv(CallingConv::X86_FastCall);
2337 F->addAttribute(1, Attribute::AttrKind::InReg);
2338 }
2339 return;
2340 }
2341 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2342 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2343 return;
2344 TargetLowering::insertSSPDeclarations(M);
2345}
2346
2347Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2348 // MSVC CRT has a global variable holding security cookie.
2349 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2350 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2351 return M.getGlobalVariable("__security_cookie");
2352 }
2353 return TargetLowering::getSDagStackGuard(M);
2354}
2355
2356Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2357 // MSVC CRT has a function to validate security cookie.
2358 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2359 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2360 return M.getFunction("__security_check_cookie");
2361 }
2362 return TargetLowering::getSSPStackGuardCheck(M);
2363}
2364
2365Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2366 if (Subtarget.getTargetTriple().isOSContiki())
2367 return getDefaultSafeStackPointerLocation(IRB, false);
2368
2369 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2370 // definition of TLS_SLOT_SAFESTACK in
2371 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2372 if (Subtarget.isTargetAndroid()) {
2373 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2374 // %gs:0x24 on i386
2375 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2376 return SegmentOffset(IRB, Offset, getAddressSpace());
2377 }
2378
2379 // Fuchsia is similar.
2380 if (Subtarget.isTargetFuchsia()) {
2381 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2382 return SegmentOffset(IRB, 0x18, getAddressSpace());
2383 }
2384
2385 return TargetLowering::getSafeStackPointerLocation(IRB);
2386}
2387
2388bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2389 unsigned DestAS) const {
2390 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2390, __PRETTY_FUNCTION__))
;
2391
2392 return SrcAS < 256 && DestAS < 256;
2393}
2394
2395//===----------------------------------------------------------------------===//
2396// Return Value Calling Convention Implementation
2397//===----------------------------------------------------------------------===//
2398
2399bool X86TargetLowering::CanLowerReturn(
2400 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2401 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2402 SmallVector<CCValAssign, 16> RVLocs;
2403 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2404 return CCInfo.CheckReturn(Outs, RetCC_X86);
2405}
2406
2407const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2408 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2409 return ScratchRegs;
2410}
2411
2412/// Lowers masks values (v*i1) to the local register values
2413/// \returns DAG node after lowering to register type
2414static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2415 const SDLoc &Dl, SelectionDAG &DAG) {
2416 EVT ValVT = ValArg.getValueType();
2417
2418 if (ValVT == MVT::v1i1)
2419 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2420 DAG.getIntPtrConstant(0, Dl));
2421
2422 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2423 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2424 // Two stage lowering might be required
2425 // bitcast: v8i1 -> i8 / v16i1 -> i16
2426 // anyextend: i8 -> i32 / i16 -> i32
2427 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2428 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2429 if (ValLoc == MVT::i32)
2430 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2431 return ValToCopy;
2432 }
2433
2434 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2435 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2436 // One stage lowering is required
2437 // bitcast: v32i1 -> i32 / v64i1 -> i64
2438 return DAG.getBitcast(ValLoc, ValArg);
2439 }
2440
2441 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2442}
2443
2444/// Breaks v64i1 value into two registers and adds the new node to the DAG
2445static void Passv64i1ArgInRegs(
2446 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2447 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, CCValAssign &VA,
2448 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2449 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2449, __PRETTY_FUNCTION__))
;
2450 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2450, __PRETTY_FUNCTION__))
;
2451 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2451, __PRETTY_FUNCTION__))
;
2452 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2453, __PRETTY_FUNCTION__))
2453 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2453, __PRETTY_FUNCTION__))
;
2454
2455 // Before splitting the value we cast it to i64
2456 Arg = DAG.getBitcast(MVT::i64, Arg);
2457
2458 // Splitting the value into two i32 types
2459 SDValue Lo, Hi;
2460 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2461 DAG.getConstant(0, Dl, MVT::i32));
2462 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2463 DAG.getConstant(1, Dl, MVT::i32));
2464
2465 // Attach the two i32 types into corresponding registers
2466 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2467 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2468}
2469
2470SDValue
2471X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2472 bool isVarArg,
2473 const SmallVectorImpl<ISD::OutputArg> &Outs,
2474 const SmallVectorImpl<SDValue> &OutVals,
2475 const SDLoc &dl, SelectionDAG &DAG) const {
2476 MachineFunction &MF = DAG.getMachineFunction();
2477 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2478
2479 // In some cases we need to disable registers from the default CSR list.
2480 // For example, when they are used for argument passing.
2481 bool ShouldDisableCalleeSavedRegister =
2482 CallConv == CallingConv::X86_RegCall ||
2483 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2484
2485 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2486 report_fatal_error("X86 interrupts may not return any value");
2487
2488 SmallVector<CCValAssign, 16> RVLocs;
2489 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2490 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2491
2492 SDValue Flag;
2493 SmallVector<SDValue, 6> RetOps;
2494 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2495 // Operand #1 = Bytes To Pop
2496 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2497 MVT::i32));
2498
2499 // Copy the result values into the output registers.
2500 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2501 ++I, ++OutsIndex) {
2502 CCValAssign &VA = RVLocs[I];
2503 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2503, __PRETTY_FUNCTION__))
;
2504
2505 // Add the register to the CalleeSaveDisableRegs list.
2506 if (ShouldDisableCalleeSavedRegister)
2507 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2508
2509 SDValue ValToCopy = OutVals[OutsIndex];
2510 EVT ValVT = ValToCopy.getValueType();
2511
2512 // Promote values to the appropriate types.
2513 if (VA.getLocInfo() == CCValAssign::SExt)
2514 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2515 else if (VA.getLocInfo() == CCValAssign::ZExt)
2516 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2517 else if (VA.getLocInfo() == CCValAssign::AExt) {
2518 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2519 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2520 else
2521 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2522 }
2523 else if (VA.getLocInfo() == CCValAssign::BCvt)
2524 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2525
2526 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2527, __PRETTY_FUNCTION__))
2527 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2527, __PRETTY_FUNCTION__))
;
2528
2529 // If this is x86-64, and we disabled SSE, we can't return FP values,
2530 // or SSE or MMX vectors.
2531 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2532 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2533 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2534 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2535 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2536 } else if (ValVT == MVT::f64 &&
2537 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2538 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2539 // llvm-gcc has never done it right and no one has noticed, so this
2540 // should be OK for now.
2541 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2542 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2543 }
2544
2545 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2546 // the RET instruction and handled by the FP Stackifier.
2547 if (VA.getLocReg() == X86::FP0 ||
2548 VA.getLocReg() == X86::FP1) {
2549 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2550 // change the value to the FP stack register class.
2551 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2552 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2553 RetOps.push_back(ValToCopy);
2554 // Don't emit a copytoreg.
2555 continue;
2556 }
2557
2558 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2559 // which is returned in RAX / RDX.
2560 if (Subtarget.is64Bit()) {
2561 if (ValVT == MVT::x86mmx) {
2562 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2563 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2564 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2565 ValToCopy);
2566 // If we don't have SSE2 available, convert to v4f32 so the generated
2567 // register is legal.
2568 if (!Subtarget.hasSSE2())
2569 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2570 }
2571 }
2572 }
2573
2574 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2575
2576 if (VA.needsCustom()) {
2577 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2578, __PRETTY_FUNCTION__))
2578 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2578, __PRETTY_FUNCTION__))
;
2579
2580 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RegsToPass, VA, RVLocs[++I],
2581 Subtarget);
2582
2583 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2584, __PRETTY_FUNCTION__))
2584 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2584, __PRETTY_FUNCTION__))
;
2585
2586 // Add the second register to the CalleeSaveDisableRegs list.
2587 if (ShouldDisableCalleeSavedRegister)
2588 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2589 } else {
2590 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2591 }
2592
2593 // Add nodes to the DAG and add the values into the RetOps list
2594 for (auto &Reg : RegsToPass) {
2595 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2596 Flag = Chain.getValue(1);
2597 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2598 }
2599 }
2600
2601 // Swift calling convention does not require we copy the sret argument
2602 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2603
2604 // All x86 ABIs require that for returning structs by value we copy
2605 // the sret argument into %rax/%eax (depending on ABI) for the return.
2606 // We saved the argument into a virtual register in the entry block,
2607 // so now we copy the value out and into %rax/%eax.
2608 //
2609 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2610 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2611 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2612 // either case FuncInfo->setSRetReturnReg() will have been called.
2613 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2614 // When we have both sret and another return value, we should use the
2615 // original Chain stored in RetOps[0], instead of the current Chain updated
2616 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2617
2618 // For the case of sret and another return value, we have
2619 // Chain_0 at the function entry
2620 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2621 // If we use Chain_1 in getCopyFromReg, we will have
2622 // Val = getCopyFromReg(Chain_1)
2623 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2624
2625 // getCopyToReg(Chain_0) will be glued together with
2626 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2627 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2628 // Data dependency from Unit B to Unit A due to usage of Val in
2629 // getCopyToReg(Chain_1, Val)
2630 // Chain dependency from Unit A to Unit B
2631
2632 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2633 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2634 getPointerTy(MF.getDataLayout()));
2635
2636 unsigned RetValReg
2637 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2638 X86::RAX : X86::EAX;
2639 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2640 Flag = Chain.getValue(1);
2641
2642 // RAX/EAX now acts like a return value.
2643 RetOps.push_back(
2644 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2645
2646 // Add the returned register to the CalleeSaveDisableRegs list.
2647 if (ShouldDisableCalleeSavedRegister)
2648 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2649 }
2650
2651 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2652 const MCPhysReg *I =
2653 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2654 if (I) {
2655 for (; *I; ++I) {
2656 if (X86::GR64RegClass.contains(*I))
2657 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2658 else
2659 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2659)
;
2660 }
2661 }
2662
2663 RetOps[0] = Chain; // Update chain.
2664
2665 // Add the flag if we have it.
2666 if (Flag.getNode())
2667 RetOps.push_back(Flag);
2668
2669 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2670 if (CallConv == CallingConv::X86_INTR)
2671 opcode = X86ISD::IRET;
2672 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2673}
2674
2675bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2676 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2677 return false;
2678
2679 SDValue TCChain = Chain;
2680 SDNode *Copy = *N->use_begin();
2681 if (Copy->getOpcode() == ISD::CopyToReg) {
2682 // If the copy has a glue operand, we conservatively assume it isn't safe to
2683 // perform a tail call.
2684 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2685 return false;
2686 TCChain = Copy->getOperand(0);
2687 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2688 return false;
2689
2690 bool HasRet = false;
2691 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2692 UI != UE; ++UI) {
2693 if (UI->getOpcode() != X86ISD::RET_FLAG)
2694 return false;
2695 // If we are returning more than one value, we can definitely
2696 // not make a tail call see PR19530
2697 if (UI->getNumOperands() > 4)
2698 return false;
2699 if (UI->getNumOperands() == 4 &&
2700 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2701 return false;
2702 HasRet = true;
2703 }
2704
2705 if (!HasRet)
2706 return false;
2707
2708 Chain = TCChain;
2709 return true;
2710}
2711
2712EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2713 ISD::NodeType ExtendKind) const {
2714 MVT ReturnMVT = MVT::i32;
2715
2716 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2717 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2718 // The ABI does not require i1, i8 or i16 to be extended.
2719 //
2720 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2721 // always extending i8/i16 return values, so keep doing that for now.
2722 // (PR26665).
2723 ReturnMVT = MVT::i8;
2724 }
2725
2726 EVT MinVT = getRegisterType(Context, ReturnMVT);
2727 return VT.bitsLT(MinVT) ? MinVT : VT;
2728}
2729
2730/// Reads two 32 bit registers and creates a 64 bit mask value.
2731/// \param VA The current 32 bit value that need to be assigned.
2732/// \param NextVA The next 32 bit value that need to be assigned.
2733/// \param Root The parent DAG node.
2734/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2735/// glue purposes. In the case the DAG is already using
2736/// physical register instead of virtual, we should glue
2737/// our new SDValue to InFlag SDvalue.
2738/// \return a new SDvalue of size 64bit.
2739static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2740 SDValue &Root, SelectionDAG &DAG,
2741 const SDLoc &Dl, const X86Subtarget &Subtarget,
2742 SDValue *InFlag = nullptr) {
2743 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2743, __PRETTY_FUNCTION__))
;
2744 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2744, __PRETTY_FUNCTION__))
;
2745 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2746, __PRETTY_FUNCTION__))
2746 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2746, __PRETTY_FUNCTION__))
;
2747 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2748, __PRETTY_FUNCTION__))
2748 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2748, __PRETTY_FUNCTION__))
;
2749 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2750, __PRETTY_FUNCTION__))
2750 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2750, __PRETTY_FUNCTION__))
;
2751
2752 SDValue Lo, Hi;
2753 SDValue ArgValueLo, ArgValueHi;
2754
2755 MachineFunction &MF = DAG.getMachineFunction();
2756 const TargetRegisterClass *RC = &X86::GR32RegClass;
2757
2758 // Read a 32 bit value from the registers.
2759 if (nullptr == InFlag) {
2760 // When no physical register is present,
2761 // create an intermediate virtual register.
2762 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2763 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2764 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2765 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2766 } else {
2767 // When a physical register is available read the value from it and glue
2768 // the reads together.
2769 ArgValueLo =
2770 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2771 *InFlag = ArgValueLo.getValue(2);
2772 ArgValueHi =
2773 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2774 *InFlag = ArgValueHi.getValue(2);
2775 }
2776
2777 // Convert the i32 type into v32i1 type.
2778 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2779
2780 // Convert the i32 type into v32i1 type.
2781 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2782
2783 // Concatenate the two values together.
2784 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2785}
2786
2787/// The function will lower a register of various sizes (8/16/32/64)
2788/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2789/// \returns a DAG node contains the operand after lowering to mask type.
2790static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2791 const EVT &ValLoc, const SDLoc &Dl,
2792 SelectionDAG &DAG) {
2793 SDValue ValReturned = ValArg;
2794
2795 if (ValVT == MVT::v1i1)
2796 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2797
2798 if (ValVT == MVT::v64i1) {
2799 // In 32 bit machine, this case is handled by getv64i1Argument
2800 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2800, __PRETTY_FUNCTION__))
;
2801 // In 64 bit machine, There is no need to truncate the value only bitcast
2802 } else {
2803 MVT maskLen;
2804 switch (ValVT.getSimpleVT().SimpleTy) {
2805 case MVT::v8i1:
2806 maskLen = MVT::i8;
2807 break;
2808 case MVT::v16i1:
2809 maskLen = MVT::i16;
2810 break;
2811 case MVT::v32i1:
2812 maskLen = MVT::i32;
2813 break;
2814 default:
2815 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2815)
;
2816 }
2817
2818 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2819 }
2820 return DAG.getBitcast(ValVT, ValReturned);
2821}
2822
2823/// Lower the result values of a call into the
2824/// appropriate copies out of appropriate physical registers.
2825///
2826SDValue X86TargetLowering::LowerCallResult(
2827 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2828 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2829 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2830 uint32_t *RegMask) const {
2831
2832 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2833 // Assign locations to each value returned by this call.
2834 SmallVector<CCValAssign, 16> RVLocs;
2835 bool Is64Bit = Subtarget.is64Bit();
2836 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2837 *DAG.getContext());
2838 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2839
2840 // Copy all of the result registers out of their specified physreg.
2841 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2842 ++I, ++InsIndex) {
2843 CCValAssign &VA = RVLocs[I];
2844 EVT CopyVT = VA.getLocVT();
2845
2846 // In some calling conventions we need to remove the used registers
2847 // from the register mask.
2848 if (RegMask) {
2849 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2850 SubRegs.isValid(); ++SubRegs)
2851 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2852 }
2853
2854 // If this is x86-64, and we disabled SSE, we can't return FP values
2855 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2856 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2857 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2858 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2859 } else if (CopyVT == MVT::f64 &&
2860 (Is64Bit && !Subtarget.hasSSE2())) {
2861 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2862 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2863 }
2864
2865 // If we prefer to use the value in xmm registers, copy it out as f80 and
2866 // use a truncate to move it from fp stack reg to xmm reg.
2867 bool RoundAfterCopy = false;
2868 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2869 isScalarFPTypeInSSEReg(VA.getValVT())) {
2870 if (!Subtarget.hasX87())
2871 report_fatal_error("X87 register return with X87 disabled");
2872 CopyVT = MVT::f80;
2873 RoundAfterCopy = (CopyVT != VA.getLocVT());
2874 }
2875
2876 SDValue Val;
2877 if (VA.needsCustom()) {
2878 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2879, __PRETTY_FUNCTION__))
2879 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2879, __PRETTY_FUNCTION__))
;
2880 Val =
2881 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2882 } else {
2883 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2884 .getValue(1);
2885 Val = Chain.getValue(0);
2886 InFlag = Chain.getValue(2);
2887 }
2888
2889 if (RoundAfterCopy)
2890 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2891 // This truncation won't change the value.
2892 DAG.getIntPtrConstant(1, dl));
2893
2894 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2895 if (VA.getValVT().isVector() &&
2896 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2897 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2898 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2899 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2900 } else
2901 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2902 }
2903
2904 InVals.push_back(Val);
2905 }
2906
2907 return Chain;
2908}
2909
2910//===----------------------------------------------------------------------===//
2911// C & StdCall & Fast Calling Convention implementation
2912//===----------------------------------------------------------------------===//
2913// StdCall calling convention seems to be standard for many Windows' API
2914// routines and around. It differs from C calling convention just a little:
2915// callee should clean up the stack, not caller. Symbols should be also
2916// decorated in some fancy way :) It doesn't support any vector arguments.
2917// For info on fast calling convention see Fast Calling Convention (tail call)
2918// implementation LowerX86_32FastCCCallTo.
2919
2920/// CallIsStructReturn - Determines whether a call uses struct return
2921/// semantics.
2922enum StructReturnType {
2923 NotStructReturn,
2924 RegStructReturn,
2925 StackStructReturn
2926};
2927static StructReturnType
2928callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
2929 if (Outs.empty())
2930 return NotStructReturn;
2931
2932 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2933 if (!Flags.isSRet())
2934 return NotStructReturn;
2935 if (Flags.isInReg() || IsMCU)
2936 return RegStructReturn;
2937 return StackStructReturn;
2938}
2939
2940/// Determines whether a function uses struct return semantics.
2941static StructReturnType
2942argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
2943 if (Ins.empty())
2944 return NotStructReturn;
2945
2946 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2947 if (!Flags.isSRet())
2948 return NotStructReturn;
2949 if (Flags.isInReg() || IsMCU)
2950 return RegStructReturn;
2951 return StackStructReturn;
2952}
2953
2954/// Make a copy of an aggregate at address specified by "Src" to address
2955/// "Dst" with size and alignment information specified by the specific
2956/// parameter attribute. The copy will be passed as a byval function parameter.
2957static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2958 SDValue Chain, ISD::ArgFlagsTy Flags,
2959 SelectionDAG &DAG, const SDLoc &dl) {
2960 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2961
2962 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2963 /*isVolatile*/false, /*AlwaysInline=*/true,
2964 /*isTailCall*/false,
2965 MachinePointerInfo(), MachinePointerInfo());
2966}
2967
2968/// Return true if the calling convention is one that we can guarantee TCO for.
2969static bool canGuaranteeTCO(CallingConv::ID CC) {
2970 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2971 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2972 CC == CallingConv::HHVM || CC == CallingConv::Tail);
2973}
2974
2975/// Return true if we might ever do TCO for calls with this calling convention.
2976static bool mayTailCallThisCC(CallingConv::ID CC) {
2977 switch (CC) {
2978 // C calling conventions:
2979 case CallingConv::C:
2980 case CallingConv::Win64:
2981 case CallingConv::X86_64_SysV:
2982 // Callee pop conventions:
2983 case CallingConv::X86_ThisCall:
2984 case CallingConv::X86_StdCall:
2985 case CallingConv::X86_VectorCall:
2986 case CallingConv::X86_FastCall:
2987 // Swift:
2988 case CallingConv::Swift:
2989 return true;
2990 default:
2991 return canGuaranteeTCO(CC);
2992 }
2993}
2994
2995/// Return true if the function is being made into a tailcall target by
2996/// changing its ABI.
2997static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2998 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
2999}
3000
3001bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3002 auto Attr =
3003 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
3004 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
3005 return false;
3006
3007 ImmutableCallSite CS(CI);
3008 CallingConv::ID CalleeCC = CS.getCallingConv();
3009 if (!mayTailCallThisCC(CalleeCC))
3010 return false;
3011
3012 return true;
3013}
3014
3015SDValue
3016X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3017 const SmallVectorImpl<ISD::InputArg> &Ins,
3018 const SDLoc &dl, SelectionDAG &DAG,
3019 const CCValAssign &VA,
3020 MachineFrameInfo &MFI, unsigned i) const {
3021 // Create the nodes corresponding to a load from this parameter slot.
3022 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3023 bool AlwaysUseMutable = shouldGuaranteeTCO(
3024 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3025 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3026 EVT ValVT;
3027 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3028
3029 // If value is passed by pointer we have address passed instead of the value
3030 // itself. No need to extend if the mask value and location share the same
3031 // absolute size.
3032 bool ExtendedInMem =
3033 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3034 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3035
3036 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3037 ValVT = VA.getLocVT();
3038 else
3039 ValVT = VA.getValVT();
3040
3041 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3042 // changed with more analysis.
3043 // In case of tail call optimization mark all arguments mutable. Since they
3044 // could be overwritten by lowering of arguments in case of a tail call.
3045 if (Flags.isByVal()) {
3046 unsigned Bytes = Flags.getByValSize();
3047 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3048
3049 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3050 // can be improved with deeper analysis.
3051 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3052 /*isAliased=*/true);
3053 return DAG.getFrameIndex(FI, PtrVT);
3054 }
3055
3056 // This is an argument in memory. We might be able to perform copy elision.
3057 // If the argument is passed directly in memory without any extension, then we
3058 // can perform copy elision. Large vector types, for example, may be passed
3059 // indirectly by pointer.
3060 if (Flags.isCopyElisionCandidate() &&
3061 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
3062 EVT ArgVT = Ins[i].ArgVT;
3063 SDValue PartAddr;
3064 if (Ins[i].PartOffset == 0) {
3065 // If this is a one-part value or the first part of a multi-part value,
3066 // create a stack object for the entire argument value type and return a
3067 // load from our portion of it. This assumes that if the first part of an
3068 // argument is in memory, the rest will also be in memory.
3069 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3070 /*IsImmutable=*/false);
3071 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3072 return DAG.getLoad(
3073 ValVT, dl, Chain, PartAddr,
3074 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3075 } else {
3076 // This is not the first piece of an argument in memory. See if there is
3077 // already a fixed stack object including this offset. If so, assume it
3078 // was created by the PartOffset == 0 branch above and create a load from
3079 // the appropriate offset into it.
3080 int64_t PartBegin = VA.getLocMemOffset();
3081 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3082 int FI = MFI.getObjectIndexBegin();
3083 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3084 int64_t ObjBegin = MFI.getObjectOffset(FI);
3085 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3086 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3087 break;
3088 }
3089 if (MFI.isFixedObjectIndex(FI)) {
3090 SDValue Addr =
3091 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3092 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3093 return DAG.getLoad(
3094 ValVT, dl, Chain, Addr,
3095 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3096 Ins[i].PartOffset));
3097 }
3098 }
3099 }
3100
3101 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3102 VA.getLocMemOffset(), isImmutable);
3103
3104 // Set SExt or ZExt flag.
3105 if (VA.getLocInfo() == CCValAssign::ZExt) {
3106 MFI.setObjectZExt(FI, true);
3107 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3108 MFI.setObjectSExt(FI, true);
3109 }
3110
3111 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3112 SDValue Val = DAG.getLoad(
3113 ValVT, dl, Chain, FIN,
3114 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3115 return ExtendedInMem
3116 ? (VA.getValVT().isVector()
3117 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3118 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3119 : Val;
3120}
3121
3122// FIXME: Get this from tablegen.
3123static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3124 const X86Subtarget &Subtarget) {
3125 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3125, __PRETTY_FUNCTION__))
;
3126
3127 if (Subtarget.isCallingConvWin64(CallConv)) {
3128 static const MCPhysReg GPR64ArgRegsWin64[] = {
3129 X86::RCX, X86::RDX, X86::R8, X86::R9
3130 };
3131 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3132 }
3133
3134 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3135 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3136 };
3137 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3138}
3139
3140// FIXME: Get this from tablegen.
3141static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3142 CallingConv::ID CallConv,
3143 const X86Subtarget &Subtarget) {
3144 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3144, __PRETTY_FUNCTION__))
;
3145 if (Subtarget.isCallingConvWin64(CallConv)) {
3146 // The XMM registers which might contain var arg parameters are shadowed
3147 // in their paired GPR. So we only need to save the GPR to their home
3148 // slots.
3149 // TODO: __vectorcall will change this.
3150 return None;
3151 }
3152
3153 const Function &F = MF.getFunction();
3154 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3155 bool isSoftFloat = Subtarget.useSoftFloat();
3156 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3157, __PRETTY_FUNCTION__))
3157 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3157, __PRETTY_FUNCTION__))
;
3158 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3159 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3160 // registers.
3161 return None;
3162
3163 static const MCPhysReg XMMArgRegs64Bit[] = {
3164 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3165 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3166 };
3167 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3168}
3169
3170#ifndef NDEBUG
3171static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3172 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
3173 [](const CCValAssign &A, const CCValAssign &B) -> bool {
3174 return A.getValNo() < B.getValNo();
3175 });
3176}
3177#endif
3178
3179SDValue X86TargetLowering::LowerFormalArguments(
3180 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3181 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3182 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3183 MachineFunction &MF = DAG.getMachineFunction();
3184 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3185 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3186
3187 const Function &F = MF.getFunction();
3188 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3189 F.getName() == "main")
3190 FuncInfo->setForceFramePointer(true);
3191
3192 MachineFrameInfo &MFI = MF.getFrameInfo();
3193 bool Is64Bit = Subtarget.is64Bit();
3194 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3195
3196 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3198, __PRETTY_FUNCTION__))
3197 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3198, __PRETTY_FUNCTION__))
3198 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3198, __PRETTY_FUNCTION__))
;
3199
3200 // Assign locations to all of the incoming arguments.
3201 SmallVector<CCValAssign, 16> ArgLocs;
3202 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3203
3204 // Allocate shadow area for Win64.
3205 if (IsWin64)
3206 CCInfo.AllocateStack(32, 8);
3207
3208 CCInfo.AnalyzeArguments(Ins, CC_X86);
3209
3210 // In vectorcall calling convention a second pass is required for the HVA
3211 // types.
3212 if (CallingConv::X86_VectorCall == CallConv) {
3213 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3214 }
3215
3216 // The next loop assumes that the locations are in the same order of the
3217 // input arguments.
3218 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3219, __PRETTY_FUNCTION__))
3219 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3219, __PRETTY_FUNCTION__))
;
3220
3221 SDValue ArgValue;
3222 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3223 ++I, ++InsIndex) {
3224 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3224, __PRETTY_FUNCTION__))
;
3225 CCValAssign &VA = ArgLocs[I];
3226
3227 if (VA.isRegLoc()) {
3228 EVT RegVT = VA.getLocVT();
3229 if (VA.needsCustom()) {
3230 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3232, __PRETTY_FUNCTION__))
3231 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3232, __PRETTY_FUNCTION__))
3232 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3232, __PRETTY_FUNCTION__))
;
3233
3234 // v64i1 values, in regcall calling convention, that are
3235 // compiled to 32 bit arch, are split up into two registers.
3236 ArgValue =
3237 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3238 } else {
3239 const TargetRegisterClass *RC;
3240 if (RegVT == MVT::i8)
3241 RC = &X86::GR8RegClass;
3242 else if (RegVT == MVT::i16)
3243 RC = &X86::GR16RegClass;
3244 else if (RegVT == MVT::i32)
3245 RC = &X86::GR32RegClass;
3246 else if (Is64Bit && RegVT == MVT::i64)
3247 RC = &X86::GR64RegClass;
3248 else if (RegVT == MVT::f32)
3249 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3250 else if (RegVT == MVT::f64)
3251 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3252 else if (RegVT == MVT::f80)
3253 RC = &X86::RFP80RegClass;
3254 else if (RegVT == MVT::f128)
3255 RC = &X86::VR128RegClass;
3256 else if (RegVT.is512BitVector())
3257 RC = &X86::VR512RegClass;
3258 else if (RegVT.is256BitVector())
3259 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3260 else if (RegVT.is128BitVector())
3261 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3262 else if (RegVT == MVT::x86mmx)
3263 RC = &X86::VR64RegClass;
3264 else if (RegVT == MVT::v1i1)
3265 RC = &X86::VK1RegClass;
3266 else if (RegVT == MVT::v8i1)
3267 RC = &X86::VK8RegClass;
3268 else if (RegVT == MVT::v16i1)
3269 RC = &X86::VK16RegClass;
3270 else if (RegVT == MVT::v32i1)
3271 RC = &X86::VK32RegClass;
3272 else if (RegVT == MVT::v64i1)
3273 RC = &X86::VK64RegClass;
3274 else
3275 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3275)
;
3276
3277 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3278 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3279 }
3280
3281 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3282 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3283 // right size.
3284 if (VA.getLocInfo() == CCValAssign::SExt)
3285 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3286 DAG.getValueType(VA.getValVT()));
3287 else if (VA.getLocInfo() == CCValAssign::ZExt)
3288 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3289 DAG.getValueType(VA.getValVT()));
3290 else if (VA.getLocInfo() == CCValAssign::BCvt)
3291 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3292
3293 if (VA.isExtInLoc()) {
3294 // Handle MMX values passed in XMM regs.
3295 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3296 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3297 else if (VA.getValVT().isVector() &&
3298 VA.getValVT().getScalarType() == MVT::i1 &&
3299 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3300 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3301 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3302 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3303 } else
3304 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3305 }
3306 } else {
3307 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3307, __PRETTY_FUNCTION__))
;
3308 ArgValue =
3309 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3310 }
3311
3312 // If value is passed via pointer - do a load.
3313 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3314 ArgValue =
3315 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3316
3317 InVals.push_back(ArgValue);
3318 }
3319
3320 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3321 // Swift calling convention does not require we copy the sret argument
3322 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3323 if (CallConv == CallingConv::Swift)
3324 continue;
3325
3326 // All x86 ABIs require that for returning structs by value we copy the
3327 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3328 // the argument into a virtual register so that we can access it from the
3329 // return points.
3330 if (Ins[I].Flags.isSRet()) {
3331 unsigned Reg = FuncInfo->getSRetReturnReg();
3332 if (!Reg) {
3333 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3334 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3335 FuncInfo->setSRetReturnReg(Reg);
3336 }
3337 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3338 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3339 break;
3340 }
3341 }
3342
3343 unsigned StackSize = CCInfo.getNextStackOffset();
3344 // Align stack specially for tail calls.
3345 if (shouldGuaranteeTCO(CallConv,
3346 MF.getTarget().Options.GuaranteedTailCallOpt))
3347 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3348
3349 // If the function takes variable number of arguments, make a frame index for
3350 // the start of the first vararg value... for expansion of llvm.va_start. We
3351 // can skip this if there are no va_start calls.
3352 if (MFI.hasVAStart() &&
3353 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3354 CallConv != CallingConv::X86_ThisCall))) {
3355 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3356 }
3357
3358 // Figure out if XMM registers are in use.
3359 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3361, __PRETTY_FUNCTION__))
3360 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3361, __PRETTY_FUNCTION__))
3361 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3361, __PRETTY_FUNCTION__))
;
3362
3363 // 64-bit calling conventions support varargs and register parameters, so we
3364 // have to do extra work to spill them in the prologue.
3365 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3366 // Find the first unallocated argument registers.
3367 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3368 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3369 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3370 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3371 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3372, __PRETTY_FUNCTION__))
3372 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3372, __PRETTY_FUNCTION__))
;
3373
3374 // Gather all the live in physical registers.
3375 SmallVector<SDValue, 6> LiveGPRs;
3376 SmallVector<SDValue, 8> LiveXMMRegs;
3377 SDValue ALVal;
3378 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3379 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3380 LiveGPRs.push_back(
3381 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3382 }
3383 if (!ArgXMMs.empty()) {
3384 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3385 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3386 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3387 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3388 LiveXMMRegs.push_back(
3389 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3390 }
3391 }
3392
3393 if (IsWin64) {
3394 // Get to the caller-allocated home save location. Add 8 to account
3395 // for the return address.
3396 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3397 FuncInfo->setRegSaveFrameIndex(
3398 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3399 // Fixup to set vararg frame on shadow area (4 x i64).
3400 if (NumIntRegs < 4)
3401 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3402 } else {
3403 // For X86-64, if there are vararg parameters that are passed via
3404 // registers, then we must store them to their spots on the stack so
3405 // they may be loaded by dereferencing the result of va_next.
3406 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3407 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3408 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3409 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3410 }
3411
3412 // Store the integer parameter registers.
3413 SmallVector<SDValue, 8> MemOps;
3414 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3415 getPointerTy(DAG.getDataLayout()));
3416 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3417 for (SDValue Val : LiveGPRs) {
3418 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3419 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3420 SDValue Store =
3421 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3422 MachinePointerInfo::getFixedStack(
3423 DAG.getMachineFunction(),
3424 FuncInfo->getRegSaveFrameIndex(), Offset));
3425 MemOps.push_back(Store);
3426 Offset += 8;
3427 }
3428
3429 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3430 // Now store the XMM (fp + vector) parameter registers.
3431 SmallVector<SDValue, 12> SaveXMMOps;
3432 SaveXMMOps.push_back(Chain);
3433 SaveXMMOps.push_back(ALVal);
3434 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3435 FuncInfo->getRegSaveFrameIndex(), dl));
3436 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3437 FuncInfo->getVarArgsFPOffset(), dl));
3438 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3439 LiveXMMRegs.end());
3440 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3441 MVT::Other, SaveXMMOps));
3442 }
3443
3444 if (!MemOps.empty())
3445 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3446 }
3447
3448 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3449 // Find the largest legal vector type.
3450 MVT VecVT = MVT::Other;
3451 // FIXME: Only some x86_32 calling conventions support AVX512.
3452 if (Subtarget.useAVX512Regs() &&
3453 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3454 CallConv == CallingConv::Intel_OCL_BI)))
3455 VecVT = MVT::v16f32;
3456 else if (Subtarget.hasAVX())
3457 VecVT = MVT::v8f32;
3458 else if (Subtarget.hasSSE2())
3459 VecVT = MVT::v4f32;
3460
3461 // We forward some GPRs and some vector types.
3462 SmallVector<MVT, 2> RegParmTypes;
3463 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3464 RegParmTypes.push_back(IntVT);
3465 if (VecVT != MVT::Other)
3466 RegParmTypes.push_back(VecVT);
3467
3468 // Compute the set of forwarded registers. The rest are scratch.
3469 SmallVectorImpl<ForwardedRegister> &Forwards =
3470 FuncInfo->getForwardedMustTailRegParms();
3471 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3472
3473 // Conservatively forward AL on x86_64, since it might be used for varargs.
3474 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3475 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3476 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3477 }
3478
3479 // Copy all forwards from physical to virtual registers.
3480 for (ForwardedRegister &FR : Forwards) {
3481 // FIXME: Can we use a less constrained schedule?
3482 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
3483 FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
3484 Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
3485 }
3486 }
3487
3488 // Some CCs need callee pop.
3489 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3490 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3491 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3492 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3493 // X86 interrupts must pop the error code (and the alignment padding) if
3494 // present.
3495 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3496 } else {
3497 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3498 // If this is an sret function, the return should pop the hidden pointer.
3499 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3500 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3501 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3502 FuncInfo->setBytesToPopOnReturn(4);
3503 }
3504
3505 if (!Is64Bit) {
3506 // RegSaveFrameIndex is X86-64 only.
3507 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3508 if (CallConv == CallingConv::X86_FastCall ||
3509 CallConv == CallingConv::X86_ThisCall)
3510 // fastcc functions can't have varargs.
3511 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3512 }
3513
3514 FuncInfo->setArgumentStackSize(StackSize);
3515
3516 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3517 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3518 if (Personality == EHPersonality::CoreCLR) {
3519 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3519, __PRETTY_FUNCTION__))
;
3520 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3521 // that we'd prefer this slot be allocated towards the bottom of the frame
3522 // (i.e. near the stack pointer after allocating the frame). Every
3523 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3524 // offset from the bottom of this and each funclet's frame must be the
3525 // same, so the size of funclets' (mostly empty) frames is dictated by
3526 // how far this slot is from the bottom (since they allocate just enough
3527 // space to accommodate holding this slot at the correct offset).
3528 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3529 EHInfo->PSPSymFrameIdx = PSPSymFI;
3530 }
3531 }
3532
3533 if (CallConv == CallingConv::X86_RegCall ||
3534 F.hasFnAttribute("no_caller_saved_registers")) {
3535 MachineRegisterInfo &MRI = MF.getRegInfo();
3536 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3537 MRI.disableCalleeSavedRegister(Pair.first);
3538 }
3539
3540 return Chain;
3541}
3542
3543SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3544 SDValue Arg, const SDLoc &dl,
3545 SelectionDAG &DAG,
3546 const CCValAssign &VA,
3547 ISD::ArgFlagsTy Flags) const {
3548 unsigned LocMemOffset = VA.getLocMemOffset();
3549 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3550 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3551 StackPtr, PtrOff);
3552 if (Flags.isByVal())
3553 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3554
3555 return DAG.getStore(
3556 Chain, dl, Arg, PtrOff,
3557 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3558}
3559
3560/// Emit a load of return address if tail call
3561/// optimization is performed and it is required.
3562SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3563 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3564 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3565 // Adjust the Return address stack slot.
3566 EVT VT = getPointerTy(DAG.getDataLayout());
3567 OutRetAddr = getReturnAddressFrameIndex(DAG);
3568
3569 // Load the "old" Return address.
3570 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3571 return SDValue(OutRetAddr.getNode(), 1);
3572}
3573
3574/// Emit a store of the return address if tail call
3575/// optimization is performed and it is required (FPDiff!=0).
3576static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3577 SDValue Chain, SDValue RetAddrFrIdx,
3578 EVT PtrVT, unsigned SlotSize,
3579 int FPDiff, const SDLoc &dl) {
3580 // Store the return address to the appropriate stack slot.
3581 if (!FPDiff) return Chain;
3582 // Calculate the new stack slot for the return address.
3583 int NewReturnAddrFI =
3584 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3585 false);
3586 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3587 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3588 MachinePointerInfo::getFixedStack(
3589 DAG.getMachineFunction(), NewReturnAddrFI));
3590 return Chain;
3591}
3592
3593/// Returns a vector_shuffle mask for an movs{s|d}, movd
3594/// operation of specified width.
3595static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3596 SDValue V2) {
3597 unsigned NumElems = VT.getVectorNumElements();
3598 SmallVector<int, 8> Mask;
3599 Mask.push_back(NumElems);
3600 for (unsigned i = 1; i != NumElems; ++i)
3601 Mask.push_back(i);
3602 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3603}
3604
3605SDValue
3606X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3607 SmallVectorImpl<SDValue> &InVals) const {
3608 SelectionDAG &DAG = CLI.DAG;
3609 SDLoc &dl = CLI.DL;
3610 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3611 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3612 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3613 SDValue Chain = CLI.Chain;
3614 SDValue Callee = CLI.Callee;
3615 CallingConv::ID CallConv = CLI.CallConv;
3616 bool &isTailCall = CLI.IsTailCall;
3617 bool isVarArg = CLI.IsVarArg;
3618
3619 MachineFunction &MF = DAG.getMachineFunction();
3620 bool Is64Bit = Subtarget.is64Bit();
3621 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3622 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3623 bool IsSibcall = false;
3624 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3625 CallConv == CallingConv::Tail;
3626 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3627 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3628 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3629 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3630 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3631 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3632 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3633 bool HasNoCfCheck =
3634 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3635 const Module *M = MF.getMMI().getModule();
3636 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3637
3638 MachineFunction::CallSiteInfo CSInfo;
3639
3640 if (CallConv == CallingConv::X86_INTR)
3641 report_fatal_error("X86 interrupts may not be called directly");
3642
3643 if (Attr.getValueAsString() == "true")
3644 isTailCall = false;
3645
3646 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
3647 // If we are using a GOT, disable tail calls to external symbols with
3648 // default visibility. Tail calling such a symbol requires using a GOT
3649 // relocation, which forces early binding of the symbol. This breaks code
3650 // that require lazy function symbol resolution. Using musttail or
3651 // GuaranteedTailCallOpt will override this.
3652 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3653 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3654 G->getGlobal()->hasDefaultVisibility()))
3655 isTailCall = false;
3656 }
3657
3658 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3659 if (IsMustTail) {
3660 // Force this to be a tail call. The verifier rules are enough to ensure
3661 // that we can lower this successfully without moving the return address
3662 // around.
3663 isTailCall = true;
3664 } else if (isTailCall) {
3665 // Check if it's really possible to do a tail call.
3666 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3667 isVarArg, SR != NotStructReturn,
3668 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3669 Outs, OutVals, Ins, DAG);
3670
3671 // Sibcalls are automatically detected tailcalls which do not require
3672 // ABI changes.
3673 if (!IsGuaranteeTCO && isTailCall)
3674 IsSibcall = true;
3675
3676 if (isTailCall)
3677 ++NumTailCalls;
3678 }
3679
3680 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3681, __PRETTY_FUNCTION__))
3681 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3681, __PRETTY_FUNCTION__))
;
3682
3683 // Analyze operands of the call, assigning locations to each operand.
3684 SmallVector<CCValAssign, 16> ArgLocs;
3685 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3686
3687 // Allocate shadow area for Win64.
3688 if (IsWin64)
3689 CCInfo.AllocateStack(32, 8);
3690
3691 CCInfo.AnalyzeArguments(Outs, CC_X86);
3692
3693 // In vectorcall calling convention a second pass is required for the HVA
3694 // types.
3695 if (CallingConv::X86_VectorCall == CallConv) {
3696 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3697 }
3698
3699 // Get a count of how many bytes are to be pushed on the stack.
3700 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3701 if (IsSibcall)
3702 // This is a sibcall. The memory operands are available in caller's
3703 // own caller's stack.
3704 NumBytes = 0;
3705 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
3706 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3707
3708 int FPDiff = 0;
3709 if (isTailCall && !IsSibcall && !IsMustTail) {
3710 // Lower arguments at fp - stackoffset + fpdiff.
3711 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3712
3713 FPDiff = NumBytesCallerPushed - NumBytes;
3714
3715 // Set the delta of movement of the returnaddr stackslot.
3716 // But only set if delta is greater than previous delta.
3717 if (FPDiff < X86Info->getTCReturnAddrDelta())
3718 X86Info->setTCReturnAddrDelta(FPDiff);
3719 }
3720
3721 unsigned NumBytesToPush = NumBytes;
3722 unsigned NumBytesToPop = NumBytes;
3723
3724 // If we have an inalloca argument, all stack space has already been allocated
3725 // for us and be right at the top of the stack. We don't support multiple
3726 // arguments passed in memory when using inalloca.
3727 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3728 NumBytesToPush = 0;
3729 if (!ArgLocs.back().isMemLoc())
3730 report_fatal_error("cannot use inalloca attribute on a register "
3731 "parameter");
3732 if (ArgLocs.back().getLocMemOffset() != 0)
3733 report_fatal_error("any parameter with the inalloca attribute must be "
3734 "the only memory argument");
3735 }
3736
3737 if (!IsSibcall)
3738 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3739 NumBytes - NumBytesToPush, dl);
3740
3741 SDValue RetAddrFrIdx;
3742 // Load return address for tail calls.
3743 if (isTailCall && FPDiff)
3744 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3745 Is64Bit, FPDiff, dl);
3746
3747 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3748 SmallVector<SDValue, 8> MemOpChains;
3749 SDValue StackPtr;
3750
3751 // The next loop assumes that the locations are in the same order of the
3752 // input arguments.
3753 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3754, __PRETTY_FUNCTION__))
3754 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3754, __PRETTY_FUNCTION__))
;
3755
3756 // Walk the register/memloc assignments, inserting copies/loads. In the case
3757 // of tail call optimization arguments are handle later.
3758 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3759 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3760 ++I, ++OutIndex) {
3761 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
;
3762 // Skip inalloca arguments, they have already been written.
3763 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3764 if (Flags.isInAlloca())
3765 continue;
3766
3767 CCValAssign &VA = ArgLocs[I];
3768 EVT RegVT = VA.getLocVT();
3769 SDValue Arg = OutVals[OutIndex];
3770 bool isByVal = Flags.isByVal();
3771
3772 // Promote the value if needed.
3773 switch (VA.getLocInfo()) {
3774 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3774)
;
3775 case CCValAssign::Full: break;
3776 case CCValAssign::SExt:
3777 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3778 break;
3779 case CCValAssign::ZExt:
3780 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3781 break;
3782 case CCValAssign::AExt:
3783 if (Arg.getValueType().isVector() &&
3784 Arg.getValueType().getVectorElementType() == MVT::i1)
3785 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3786 else if (RegVT.is128BitVector()) {
3787 // Special case: passing MMX values in XMM registers.
3788 Arg = DAG.getBitcast(MVT::i64, Arg);
3789 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3790 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3791 } else
3792 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3793 break;
3794 case CCValAssign::BCvt:
3795 Arg = DAG.getBitcast(RegVT, Arg);
3796 break;
3797 case CCValAssign::Indirect: {
3798 if (isByVal) {
3799 // Memcpy the argument to a temporary stack slot to prevent
3800 // the caller from seeing any modifications the callee may make
3801 // as guaranteed by the `byval` attribute.
3802 int FrameIdx = MF.getFrameInfo().CreateStackObject(
3803 Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
3804 false);
3805 SDValue StackSlot =
3806 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
3807 Chain =
3808 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
3809 // From now on treat this as a regular pointer
3810 Arg = StackSlot;
3811 isByVal = false;
3812 } else {
3813 // Store the argument.
3814 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3815 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3816 Chain = DAG.getStore(
3817 Chain, dl, Arg, SpillSlot,
3818 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3819 Arg = SpillSlot;
3820 }
3821 break;
3822 }
3823 }
3824
3825 if (VA.needsCustom()) {
3826 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3827, __PRETTY_FUNCTION__))
3827 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3827, __PRETTY_FUNCTION__))
;
3828 // Split v64i1 value into two registers
3829 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
3830 } else if (VA.isRegLoc()) {
3831 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3832 const TargetOptions &Options = DAG.getTarget().Options;
3833 if (Options.EnableDebugEntryValues)
3834 CSInfo.emplace_back(VA.getLocReg(), I);
3835 if (isVarArg && IsWin64) {
3836 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3837 // shadow reg if callee is a varargs function.
3838 unsigned ShadowReg = 0;
3839 switch (VA.getLocReg()) {
3840 case X86::XMM0: ShadowReg = X86::RCX; break;
3841 case X86::XMM1: ShadowReg = X86::RDX; break;
3842 case X86::XMM2: ShadowReg = X86::R8; break;
3843 case X86::XMM3: ShadowReg = X86::R9; break;
3844 }
3845 if (ShadowReg)
3846 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3847 }
3848 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3849 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3849, __PRETTY_FUNCTION__))
;
3850 if (!StackPtr.getNode())
3851 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3852 getPointerTy(DAG.getDataLayout()));
3853 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3854 dl, DAG, VA, Flags));
3855 }
3856 }
3857
3858 if (!MemOpChains.empty())
3859 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3860
3861 if (Subtarget.isPICStyleGOT()) {
3862 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3863 // GOT pointer.
3864 if (!isTailCall) {
3865 RegsToPass.push_back(std::make_pair(
3866 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3867 getPointerTy(DAG.getDataLayout()))));
3868 } else {
3869 // If we are tail calling and generating PIC/GOT style code load the
3870 // address of the callee into ECX. The value in ecx is used as target of
3871 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3872 // for tail calls on PIC/GOT architectures. Normally we would just put the
3873 // address of GOT into ebx and then call target@PLT. But for tail calls
3874 // ebx would be restored (since ebx is callee saved) before jumping to the
3875 // target@PLT.
3876
3877 // Note: The actual moving to ECX is done further down.
3878 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3879 if (G && !G->getGlobal()->hasLocalLinkage() &&
3880 G->getGlobal()->hasDefaultVisibility())
3881 Callee = LowerGlobalAddress(Callee, DAG);
3882 else if (isa<ExternalSymbolSDNode>(Callee))
3883 Callee = LowerExternalSymbol(Callee, DAG);
3884 }
3885 }
3886
3887 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3888 // From AMD64 ABI document:
3889 // For calls that may call functions that use varargs or stdargs
3890 // (prototype-less calls or calls to functions containing ellipsis (...) in
3891 // the declaration) %al is used as hidden argument to specify the number
3892 // of SSE registers used. The contents of %al do not need to match exactly
3893 // the number of registers, but must be an ubound on the number of SSE
3894 // registers used and is in the range 0 - 8 inclusive.
3895
3896 // Count the number of XMM registers allocated.
3897 static const MCPhysReg XMMArgRegs[] = {
3898 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3899 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3900 };
3901 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3902 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3903, __PRETTY_FUNCTION__))
3903 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3903, __PRETTY_FUNCTION__))
;
3904
3905 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3906 DAG.getConstant(NumXMMRegs, dl,
3907 MVT::i8)));
3908 }
3909
3910 if (isVarArg && IsMustTail) {
3911 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3912 for (const auto &F : Forwards) {
3913 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3914 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3915 }
3916 }
3917
3918 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3919 // don't need this because the eligibility check rejects calls that require
3920 // shuffling arguments passed in memory.
3921 if (!IsSibcall && isTailCall) {
3922 // Force all the incoming stack arguments to be loaded from the stack
3923 // before any new outgoing arguments are stored to the stack, because the
3924 // outgoing stack slots may alias the incoming argument stack slots, and
3925 // the alias isn't otherwise explicit. This is slightly more conservative
3926 // than necessary, because it means that each store effectively depends
3927 // on every argument instead of just those arguments it would clobber.
3928 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3929
3930 SmallVector<SDValue, 8> MemOpChains2;
3931 SDValue FIN;
3932 int FI = 0;
3933 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3934 ++I, ++OutsIndex) {
3935 CCValAssign &VA = ArgLocs[I];
3936
3937 if (VA.isRegLoc()) {
3938 if (VA.needsCustom()) {
3939 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3940, __PRETTY_FUNCTION__))
3940 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3940, __PRETTY_FUNCTION__))
;
3941 // This means that we are in special case where one argument was
3942 // passed through two register locations - Skip the next location
3943 ++I;
3944 }
3945
3946 continue;
3947 }
3948
3949 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3949, __PRETTY_FUNCTION__))
;
3950 SDValue Arg = OutVals[OutsIndex];
3951 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3952 // Skip inalloca arguments. They don't require any work.
3953 if (Flags.isInAlloca())
3954 continue;
3955 // Create frame index.
3956 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3957 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3958 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3959 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3960
3961 if (Flags.isByVal()) {
3962 // Copy relative to framepointer.
3963 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3964 if (!StackPtr.getNode())
3965 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3966 getPointerTy(DAG.getDataLayout()));
3967 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3968 StackPtr, Source);
3969
3970 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3971 ArgChain,
3972 Flags, DAG, dl));
3973 } else {
3974 // Store relative to framepointer.
3975 MemOpChains2.push_back(DAG.getStore(
3976 ArgChain, dl, Arg, FIN,
3977 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3978 }
3979 }
3980
3981 if (!MemOpChains2.empty())
3982 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3983
3984 // Store the return address to the appropriate stack slot.
3985 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3986 getPointerTy(DAG.getDataLayout()),
3987 RegInfo->getSlotSize(), FPDiff, dl);
3988 }
3989
3990 // Build a sequence of copy-to-reg nodes chained together with token chain
3991 // and flag operands which copy the outgoing args into registers.
3992 SDValue InFlag;
3993 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3994 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3995 RegsToPass[i].second, InFlag);
3996 InFlag = Chain.getValue(1);
3997 }
3998
3999 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4000 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4000, __PRETTY_FUNCTION__))
;
4001 // In the 64-bit large code model, we have to make all calls
4002 // through a register, since the call instruction's 32-bit
4003 // pc-relative offset may not be large enough to hold the whole
4004 // address.
4005 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4006 Callee->getOpcode() == ISD::ExternalSymbol) {
4007 // Lower direct calls to global addresses and external symbols. Setting
4008 // ForCall to true here has the effect of removing WrapperRIP when possible
4009 // to allow direct calls to be selected without first materializing the
4010 // address into a register.
4011 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4012 } else if (Subtarget.isTarget64BitILP32() &&
4013 Callee->getValueType(0) == MVT::i32) {
4014 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4015 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4016 }
4017
4018 // Returns a chain & a flag for retval copy to use.
4019 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4020 SmallVector<SDValue, 8> Ops;
4021
4022 if (!IsSibcall && isTailCall) {
4023 Chain = DAG.getCALLSEQ_END(Chain,
4024 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4025 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4026 InFlag = Chain.getValue(1);
4027 }
4028
4029 Ops.push_back(Chain);
4030 Ops.push_back(Callee);
4031
4032 if (isTailCall)
4033 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
4034
4035 // Add argument registers to the end of the list so that they are known live
4036 // into the call.
4037 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4038 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4039 RegsToPass[i].second.getValueType()));
4040
4041 // Add a register mask operand representing the call-preserved registers.
4042 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4043 // set X86_INTR calling convention because it has the same CSR mask
4044 // (same preserved registers).
4045 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4046 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4047 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4047, __PRETTY_FUNCTION__))
;
4048
4049 // If this is an invoke in a 32-bit function using a funclet-based
4050 // personality, assume the function clobbers all registers. If an exception
4051 // is thrown, the runtime will not restore CSRs.
4052 // FIXME: Model this more precisely so that we can register allocate across
4053 // the normal edge and spill and fill across the exceptional edge.
4054 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
4055 const Function &CallerFn = MF.getFunction();
4056 EHPersonality Pers =
4057 CallerFn.hasPersonalityFn()
4058 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4059 : EHPersonality::Unknown;
4060 if (isFuncletEHPersonality(Pers))
4061 Mask = RegInfo->getNoPreservedMask();
4062 }
4063
4064 // Define a new register mask from the existing mask.
4065 uint32_t *RegMask = nullptr;
4066
4067 // In some calling conventions we need to remove the used physical registers
4068 // from the reg mask.
4069 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4070 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4071
4072 // Allocate a new Reg Mask and copy Mask.
4073 RegMask = MF.allocateRegMask();
4074 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4075 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4076
4077 // Make sure all sub registers of the argument registers are reset
4078 // in the RegMask.
4079 for (auto const &RegPair : RegsToPass)
4080 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4081 SubRegs.isValid(); ++SubRegs)
4082 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4083
4084 // Create the RegMask Operand according to our updated mask.
4085 Ops.push_back(DAG.getRegisterMask(RegMask));
4086 } else {
4087 // Create the RegMask Operand according to the static mask.
4088 Ops.push_back(DAG.getRegisterMask(Mask));
4089 }
4090
4091 if (InFlag.getNode())
4092 Ops.push_back(InFlag);
4093
4094 if (isTailCall) {
4095 // We used to do:
4096 //// If this is the first return lowered for this function, add the regs
4097 //// to the liveout set for the function.
4098 // This isn't right, although it's probably harmless on x86; liveouts
4099 // should be computed from returns not tail calls. Consider a void
4100 // function making a tail call to a function returning int.
4101 MF.getFrameInfo().setHasTailCall();
4102 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4103 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4104 return Ret;
4105 }
4106
4107 if (HasNoCfCheck && IsCFProtectionSupported) {
4108 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4109 } else {
4110 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4111 }
4112 InFlag = Chain.getValue(1);
4113 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4114
4115 // Save heapallocsite metadata.
4116 if (CLI.CS)
4117 if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
4118 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4119
4120 // Create the CALLSEQ_END node.
4121 unsigned NumBytesForCalleeToPop;
4122 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4123 DAG.getTarget().Options.GuaranteedTailCallOpt))
4124 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4125 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4126 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4127 SR == StackStructReturn)
4128 // If this is a call to a struct-return function, the callee
4129 // pops the hidden struct pointer, so we have to push it back.
4130 // This is common for Darwin/X86, Linux & Mingw32 targets.
4131 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4132 NumBytesForCalleeToPop = 4;
4133 else
4134 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4135
4136 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
4137 // No need to reset the stack after the call if the call doesn't return. To
4138 // make the MI verify, we'll pretend the callee does it for us.
4139 NumBytesForCalleeToPop = NumBytes;
4140 }
4141
4142 // Returns a flag for retval copy to use.
4143 if (!IsSibcall) {
4144 Chain = DAG.getCALLSEQ_END(Chain,
4145 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4146 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4147 true),
4148 InFlag, dl);
4149 InFlag = Chain.getValue(1);
4150 }
4151
4152 // Handle result values, copying them out of physregs into vregs that we
4153 // return.
4154 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4155 InVals, RegMask);
4156}
4157
4158//===----------------------------------------------------------------------===//
4159// Fast Calling Convention (tail call) implementation
4160//===----------------------------------------------------------------------===//
4161
4162// Like std call, callee cleans arguments, convention except that ECX is
4163// reserved for storing the tail called function address. Only 2 registers are
4164// free for argument passing (inreg). Tail call optimization is performed
4165// provided:
4166// * tailcallopt is enabled
4167// * caller/callee are fastcc
4168// On X86_64 architecture with GOT-style position independent code only local
4169// (within module) calls are supported at the moment.
4170// To keep the stack aligned according to platform abi the function
4171// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4172// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4173// If a tail called function callee has more arguments than the caller the
4174// caller needs to make sure that there is room to move the RETADDR to. This is
4175// achieved by reserving an area the size of the argument delta right after the
4176// original RETADDR, but before the saved framepointer or the spilled registers
4177// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4178// stack layout:
4179// arg1
4180// arg2
4181// RETADDR
4182// [ new RETADDR
4183// move area ]
4184// (possible EBP)
4185// ESI
4186// EDI
4187// local1 ..
4188
4189/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4190/// requirement.
4191unsigned
4192X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4193 SelectionDAG &DAG) const {
4194 const Align StackAlignment(Subtarget.getFrameLowering()->getStackAlignment());
4195 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4196 assert(StackSize % SlotSize == 0 &&((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4197, __PRETTY_FUNCTION__))
4197 "StackSize must be a multiple of SlotSize")((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4197, __PRETTY_FUNCTION__))
;
4198 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4199}
4200
4201/// Return true if the given stack call argument is already available in the
4202/// same position (relatively) of the caller's incoming argument stack.
4203static
4204bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4205 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4206 const X86InstrInfo *TII, const CCValAssign &VA) {
4207 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4208
4209 for (;;) {
4210 // Look through nodes that don't alter the bits of the incoming value.
4211 unsigned Op = Arg.getOpcode();
4212 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4213 Arg = Arg.getOperand(0);
4214 continue;
4215 }
4216 if (Op == ISD::TRUNCATE) {
4217 const SDValue &TruncInput = Arg.getOperand(0);
4218 if (TruncInput.getOpcode() == ISD::AssertZext &&
4219 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4220 Arg.getValueType()) {
4221 Arg = TruncInput.getOperand(0);
4222 continue;
4223 }
4224 }
4225 break;
4226 }
4227
4228 int FI = INT_MAX2147483647;
4229 if (Arg.getOpcode() == ISD::CopyFromReg) {
4230 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4231 if (!Register::isVirtualRegister(VR))
4232 return false;
4233 MachineInstr *Def = MRI->getVRegDef(VR);
4234 if (!Def)
4235 return false;
4236 if (!Flags.isByVal()) {
4237 if (!TII->isLoadFromStackSlot(*Def, FI))
4238 return false;
4239 } else {
4240 unsigned Opcode = Def->getOpcode();
4241 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4242 Opcode == X86::LEA64_32r) &&
4243 Def->getOperand(1).isFI()) {
4244 FI = Def->getOperand(1).getIndex();
4245 Bytes = Flags.getByValSize();
4246 } else
4247 return false;
4248 }
4249 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4250 if (Flags.isByVal())
4251 // ByVal argument is passed in as a pointer but it's now being
4252 // dereferenced. e.g.
4253 // define @foo(%struct.X* %A) {
4254 // tail call @bar(%struct.X* byval %A)
4255 // }
4256 return false;
4257 SDValue Ptr = Ld->getBasePtr();
4258 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4259 if (!FINode)
4260 return false;
4261 FI = FINode->getIndex();
4262 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4263 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4264 FI = FINode->getIndex();
4265 Bytes = Flags.getByValSize();
4266 } else
4267 return false;
4268
4269 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4269, __PRETTY_FUNCTION__))
;
4270 if (!MFI.isFixedObjectIndex(FI))
4271 return false;
4272
4273 if (Offset != MFI.getObjectOffset(FI))
4274 return false;
4275
4276 // If this is not byval, check that the argument stack object is immutable.
4277 // inalloca and argument copy elision can create mutable argument stack
4278 // objects. Byval objects can be mutated, but a byval call intends to pass the
4279 // mutated memory.
4280 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4281 return false;
4282
4283 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4284 // If the argument location is wider than the argument type, check that any
4285 // extension flags match.
4286 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4287 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4288 return false;
4289 }
4290 }
4291
4292 return Bytes == MFI.getObjectSize(FI);
4293}
4294
4295/// Check whether the call is eligible for tail call optimization. Targets
4296/// that want to do tail call optimization should implement this function.
4297bool X86TargetLowering::IsEligibleForTailCallOptimization(
4298 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4299 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4300 const SmallVectorImpl<ISD::OutputArg> &Outs,
4301 const SmallVectorImpl<SDValue> &OutVals,
4302 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4303 if (!mayTailCallThisCC(CalleeCC))
4304 return false;
4305
4306 // If -tailcallopt is specified, make fastcc functions tail-callable.
4307 MachineFunction &MF = DAG.getMachineFunction();
4308 const Function &CallerF = MF.getFunction();
4309
4310 // If the function return type is x86_fp80 and the callee return type is not,
4311 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4312 // perform a tailcall optimization here.
4313 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4314 return false;
4315
4316 CallingConv::ID CallerCC = CallerF.getCallingConv();
4317 bool CCMatch = CallerCC == CalleeCC;
4318 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4319 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4320 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4321 CalleeCC == CallingConv::Tail;
4322
4323 // Win64 functions have extra shadow space for argument homing. Don't do the
4324 // sibcall if the caller and callee have mismatched expectations for this
4325 // space.
4326 if (IsCalleeWin64 != IsCallerWin64)
4327 return false;
4328
4329 if (IsGuaranteeTCO) {
4330 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4331 return true;
4332 return false;
4333 }
4334
4335 // Look for obvious safe cases to perform tail call optimization that do not
4336 // require ABI changes. This is what gcc calls sibcall.
4337
4338 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4339 // emit a special epilogue.
4340 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4341 if (RegInfo->needsStackRealignment(MF))
4342 return false;
4343
4344 // Also avoid sibcall optimization if either caller or callee uses struct
4345 // return semantics.
4346 if (isCalleeStructRet || isCallerStructRet)
4347 return false;
4348
4349 // Do not sibcall optimize vararg calls unless all arguments are passed via
4350 // registers.
4351 LLVMContext &C = *DAG.getContext();
4352 if (isVarArg && !Outs.empty()) {
4353 // Optimizing for varargs on Win64 is unlikely to be safe without
4354 // additional testing.
4355 if (IsCalleeWin64 || IsCallerWin64)
4356 return false;
4357
4358 SmallVector<CCValAssign, 16> ArgLocs;
4359 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4360
4361 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4362 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4363 if (!ArgLocs[i].isRegLoc())
4364 return false;
4365 }
4366
4367 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4368 // stack. Therefore, if it's not used by the call it is not safe to optimize
4369 // this into a sibcall.
4370 bool Unused = false;
4371 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4372 if (!Ins[i].Used) {
4373 Unused = true;
4374 break;
4375 }
4376 }
4377 if (Unused) {
4378 SmallVector<CCValAssign, 16> RVLocs;
4379 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4380 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4381 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4382 CCValAssign &VA = RVLocs[i];
4383 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4384 return false;
4385 }
4386 }
4387
4388 // Check that the call results are passed in the same way.
4389 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4390 RetCC_X86, RetCC_X86))
4391 return false;
4392 // The callee has to preserve all registers the caller needs to preserve.
4393 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4394 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4395 if (!CCMatch) {
4396 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4397 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4398 return false;
4399 }
4400
4401 unsigned StackArgsSize = 0;
4402
4403 // If the callee takes no arguments then go on to check the results of the
4404 // call.
4405 if (!Outs.empty()) {
4406 // Check if stack adjustment is needed. For now, do not do this if any
4407 // argument is passed on the stack.
4408 SmallVector<CCValAssign, 16> ArgLocs;
4409 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4410
4411 // Allocate shadow area for Win64
4412 if (IsCalleeWin64)
4413 CCInfo.AllocateStack(32, 8);
4414
4415 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4416 StackArgsSize = CCInfo.getNextStackOffset();
4417
4418 if (CCInfo.getNextStackOffset()) {
4419 // Check if the arguments are already laid out in the right way as
4420 // the caller's fixed stack objects.
4421 MachineFrameInfo &MFI = MF.getFrameInfo();
4422 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4423 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4424 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4425 CCValAssign &VA = ArgLocs[i];
4426 SDValue Arg = OutVals[i];
4427 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4428 if (VA.getLocInfo() == CCValAssign::Indirect)
4429 return false;
4430 if (!VA.isRegLoc()) {
4431 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4432 MFI, MRI, TII, VA))
4433 return false;
4434 }
4435 }
4436 }
4437
4438 bool PositionIndependent = isPositionIndependent();
4439 // If the tailcall address may be in a register, then make sure it's
4440 // possible to register allocate for it. In 32-bit, the call address can
4441 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4442 // callee-saved registers are restored. These happen to be the same
4443 // registers used to pass 'inreg' arguments so watch out for those.
4444 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4445 !isa<ExternalSymbolSDNode>(Callee)) ||
4446 PositionIndependent)) {
4447 unsigned NumInRegs = 0;
4448 // In PIC we need an extra register to formulate the address computation
4449 // for the callee.
4450 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4451
4452 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4453 CCValAssign &VA = ArgLocs[i];
4454 if (!VA.isRegLoc())
4455 continue;
4456 Register Reg = VA.getLocReg();
4457 switch (Reg) {
4458 default: break;
4459 case X86::EAX: case X86::EDX: case X86::ECX:
4460 if (++NumInRegs == MaxInRegs)
4461 return false;
4462 break;
4463 }
4464 }
4465 }
4466
4467 const MachineRegisterInfo &MRI = MF.getRegInfo();
4468 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4469 return false;
4470 }
4471
4472 bool CalleeWillPop =
4473 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4474 MF.getTarget().Options.GuaranteedTailCallOpt);
4475
4476 if (unsigned BytesToPop =
4477 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4478 // If we have bytes to pop, the callee must pop them.
4479 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4480 if (!CalleePopMatches)
4481 return false;
4482 } else if (CalleeWillPop && StackArgsSize > 0) {
4483 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4484 return false;
4485 }
4486
4487 return true;
4488}
4489
4490FastISel *
4491X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4492 const TargetLibraryInfo *libInfo) const {
4493 return X86::createFastISel(funcInfo, libInfo);
4494}
4495
4496//===----------------------------------------------------------------------===//
4497// Other Lowering Hooks
4498//===----------------------------------------------------------------------===//
4499
4500static bool MayFoldLoad(SDValue Op) {
4501 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4502}
4503
4504static bool MayFoldIntoStore(SDValue Op) {
4505 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4506}
4507
4508static bool MayFoldIntoZeroExtend(SDValue Op) {
4509 if (Op.hasOneUse()) {
4510 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4511 return (ISD::ZERO_EXTEND == Opcode);
4512 }
4513 return false;
4514}
4515
4516static bool isTargetShuffle(unsigned Opcode) {
4517 switch(Opcode) {
4518 default: return false;
4519 case X86ISD::BLENDI:
4520 case X86ISD::PSHUFB:
4521 case X86ISD::PSHUFD:
4522 case X86ISD::PSHUFHW:
4523 case X86ISD::PSHUFLW:
4524 case X86ISD::SHUFP:
4525 case X86ISD::INSERTPS:
4526 case X86ISD::EXTRQI:
4527 case X86ISD::INSERTQI:
4528 case X86ISD::PALIGNR:
4529 case X86ISD::VSHLDQ:
4530 case X86ISD::VSRLDQ:
4531 case X86ISD::MOVLHPS:
4532 case X86ISD::MOVHLPS:
4533 case X86ISD::MOVSHDUP:
4534 case X86ISD::MOVSLDUP:
4535 case X86ISD::MOVDDUP:
4536 case X86ISD::MOVSS:
4537 case X86ISD::MOVSD:
4538 case X86ISD::UNPCKL:
4539 case X86ISD::UNPCKH:
4540 case X86ISD::VBROADCAST:
4541 case X86ISD::VPERMILPI:
4542 case X86ISD::VPERMILPV:
4543 case X86ISD::VPERM2X128:
4544 case X86ISD::SHUF128:
4545 case X86ISD::VPERMIL2:
4546 case X86ISD::VPERMI:
4547 case X86ISD::VPPERM:
4548 case X86ISD::VPERMV:
4549 case X86ISD::VPERMV3:
4550 case X86ISD::VZEXT_MOVL:
4551 return true;
4552 }
4553}
4554
4555static bool isTargetShuffleVariableMask(unsigned Opcode) {
4556 switch (Opcode) {
4557 default: return false;
4558 // Target Shuffles.
4559 case X86ISD::PSHUFB:
4560 case X86ISD::VPERMILPV:
4561 case X86ISD::VPERMIL2:
4562 case X86ISD::VPPERM:
4563 case X86ISD::VPERMV:
4564 case X86ISD::VPERMV3:
4565 return true;
4566 // 'Faux' Target Shuffles.
4567 case ISD::OR:
4568 case ISD::AND:
4569 case X86ISD::ANDNP:
4570 return true;
4571 }
4572}
4573
4574SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4575 MachineFunction &MF = DAG.getMachineFunction();
4576 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4577 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4578 int ReturnAddrIndex = FuncInfo->getRAIndex();
4579
4580 if (ReturnAddrIndex == 0) {
4581 // Set up a frame object for the return address.
4582 unsigned SlotSize = RegInfo->getSlotSize();
4583 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4584 -(int64_t)SlotSize,
4585 false);
4586 FuncInfo->setRAIndex(ReturnAddrIndex);
4587 }
4588
4589 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4590}
4591
4592bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4593 bool hasSymbolicDisplacement) {
4594 // Offset should fit into 32 bit immediate field.
4595 if (!isInt<32>(Offset))
4596 return false;
4597
4598 // If we don't have a symbolic displacement - we don't have any extra
4599 // restrictions.
4600 if (!hasSymbolicDisplacement)
4601 return true;
4602
4603 // FIXME: Some tweaks might be needed for medium code model.
4604 if (M != CodeModel::Small && M != CodeModel::Kernel)
4605 return false;
4606
4607 // For small code model we assume that latest object is 16MB before end of 31
4608 // bits boundary. We may also accept pretty large negative constants knowing
4609 // that all objects are in the positive half of address space.
4610 if (M == CodeModel::Small && Offset < 16*1024*1024)
4611 return true;
4612
4613 // For kernel code model we know that all object resist in the negative half
4614 // of 32bits address space. We may not accept negative offsets, since they may
4615 // be just off and we may accept pretty large positive ones.
4616 if (M == CodeModel::Kernel && Offset >= 0)
4617 return true;
4618
4619 return false;
4620}
4621
4622/// Determines whether the callee is required to pop its own arguments.
4623/// Callee pop is necessary to support tail calls.
4624bool X86::isCalleePop(CallingConv::ID CallingConv,
4625 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4626 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4627 // can guarantee TCO.
4628 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4629 return true;
4630
4631 switch (CallingConv) {
4632 default:
4633 return false;
4634 case CallingConv::X86_StdCall:
4635 case CallingConv::X86_FastCall:
4636 case CallingConv::X86_ThisCall:
4637 case CallingConv::X86_VectorCall:
4638 return !is64Bit;
4639 }
4640}
4641
4642/// Return true if the condition is an signed comparison operation.
4643static bool isX86CCSigned(unsigned X86CC) {
4644 switch (X86CC) {
4645 default:
4646 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4646)
;
4647 case X86::COND_E:
4648 case X86::COND_NE:
4649 case X86::COND_B:
4650 case X86::COND_A:
4651 case X86::COND_BE:
4652 case X86::COND_AE:
4653 return false;
4654 case X86::COND_G:
4655 case X86::COND_GE:
4656 case X86::COND_L:
4657 case X86::COND_LE:
4658 return true;
4659 }
4660}
4661
4662static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4663 switch (SetCCOpcode) {
4664 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4664)
;
4665 case ISD::SETEQ: return X86::COND_E;
4666 case ISD::SETGT: return X86::COND_G;
4667 case ISD::SETGE: return X86::COND_GE;
4668 case ISD::SETLT: return X86::COND_L;
4669 case ISD::SETLE: return X86::COND_LE;
4670 case ISD::SETNE: return X86::COND_NE;
4671 case ISD::SETULT: return X86::COND_B;
4672 case ISD::SETUGT: return X86::COND_A;
4673 case ISD::SETULE: return X86::COND_BE;
4674 case ISD::SETUGE: return X86::COND_AE;
4675 }
4676}
4677
4678/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4679/// condition code, returning the condition code and the LHS/RHS of the
4680/// comparison to make.
4681static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4682 bool isFP, SDValue &LHS, SDValue &RHS,
4683 SelectionDAG &DAG) {
4684 if (!isFP) {
4685 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4686 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4687 // X > -1 -> X == 0, jump !sign.
4688 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4689 return X86::COND_NS;
4690 }
4691 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4692 // X < 0 -> X == 0, jump on sign.
4693 return X86::COND_S;
4694 }
4695 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
4696 // X >= 0 -> X == 0, jump on !sign.
4697 return X86::COND_NS;
4698 }
4699 if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) {
4700 // X < 1 -> X <= 0
4701 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4702 return X86::COND_LE;
4703 }
4704 }
4705
4706 return TranslateIntegerX86CC(SetCCOpcode);
4707 }
4708
4709 // First determine if it is required or is profitable to flip the operands.
4710
4711 // If LHS is a foldable load, but RHS is not, flip the condition.
4712 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4713 !ISD::isNON_EXTLoad(RHS.getNode())) {
4714 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4715 std::swap(LHS, RHS);
4716 }
4717
4718 switch (SetCCOpcode) {
4719 default: break;
4720 case ISD::SETOLT:
4721 case ISD::SETOLE:
4722 case ISD::SETUGT:
4723 case ISD::SETUGE:
4724 std::swap(LHS, RHS);
4725 break;
4726 }
4727
4728 // On a floating point condition, the flags are set as follows:
4729 // ZF PF CF op
4730 // 0 | 0 | 0 | X > Y
4731 // 0 | 0 | 1 | X < Y
4732 // 1 | 0 | 0 | X == Y
4733 // 1 | 1 | 1 | unordered
4734 switch (SetCCOpcode) {
4735 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4735)
;
4736 case ISD::SETUEQ:
4737 case ISD::SETEQ: return X86::COND_E;
4738 case ISD::SETOLT: // flipped
4739 case ISD::SETOGT:
4740 case ISD::SETGT: return X86::COND_A;
4741 case ISD::SETOLE: // flipped
4742 case ISD::SETOGE:
4743 case ISD::SETGE: return X86::COND_AE;
4744 case ISD::SETUGT: // flipped
4745 case ISD::SETULT:
4746 case ISD::SETLT: return X86::COND_B;
4747 case ISD::SETUGE: // flipped
4748 case ISD::SETULE:
4749 case ISD::SETLE: return X86::COND_BE;
4750 case ISD::SETONE:
4751 case ISD::SETNE: return X86::COND_NE;
4752 case ISD::SETUO: return X86::COND_P;
4753 case ISD::SETO: return X86::COND_NP;
4754 case ISD::SETOEQ:
4755 case ISD::SETUNE: return X86::COND_INVALID;
4756 }
4757}
4758
4759/// Is there a floating point cmov for the specific X86 condition code?
4760/// Current x86 isa includes the following FP cmov instructions:
4761/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4762static bool hasFPCMov(unsigned X86CC) {
4763 switch (X86CC) {
4764 default:
4765 return false;
4766 case X86::COND_B:
4767 case X86::COND_BE:
4768 case X86::COND_E:
4769 case X86::COND_P:
4770 case X86::COND_A:
4771 case X86::COND_AE:
4772 case X86::COND_NE:
4773 case X86::COND_NP:
4774 return true;
4775 }
4776}
4777
4778
4779bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4780 const CallInst &I,
4781 MachineFunction &MF,
4782 unsigned Intrinsic) const {
4783
4784 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4785 if (!IntrData)
4786 return false;
4787
4788 Info.flags = MachineMemOperand::MONone;
4789 Info.offset = 0;
4790
4791 switch (IntrData->Type) {
4792 case TRUNCATE_TO_MEM_VI8:
4793 case TRUNCATE_TO_MEM_VI16:
4794 case TRUNCATE_TO_MEM_VI32: {
4795 Info.opc = ISD::INTRINSIC_VOID;
4796 Info.ptrVal = I.getArgOperand(0);
4797 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4798 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4799 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4800 ScalarVT = MVT::i8;
4801 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4802 ScalarVT = MVT::i16;
4803 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4804 ScalarVT = MVT::i32;
4805
4806 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4807 Info.align = Align::None();
4808 Info.flags |= MachineMemOperand::MOStore;
4809 break;
4810 }
4811 case GATHER:
4812 case GATHER_AVX2: {
4813 Info.opc = ISD::INTRINSIC_W_CHAIN;
4814 Info.ptrVal = nullptr;
4815 MVT DataVT = MVT::getVT(I.getType());
4816 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4817 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4818 IndexVT.getVectorNumElements());
4819 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4820 Info.align = Align::None();
4821 Info.flags |= MachineMemOperand::MOLoad;
4822 break;
4823 }
4824 case SCATTER: {
4825 Info.opc = ISD::INTRINSIC_VOID;
4826 Info.ptrVal = nullptr;
4827 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
4828 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4829 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4830 IndexVT.getVectorNumElements());
4831 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4832 Info.align = Align::None();
4833 Info.flags |= MachineMemOperand::MOStore;
4834 break;
4835 }
4836 default:
4837 return false;
4838 }
4839
4840 return true;
4841}
4842
4843/// Returns true if the target can instruction select the
4844/// specified FP immediate natively. If false, the legalizer will
4845/// materialize the FP immediate as a load from a constant pool.
4846bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4847 bool ForCodeSize) const {
4848 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4849 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4850 return true;
4851 }
4852 return false;
4853}
4854
4855bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4856 ISD::LoadExtType ExtTy,
4857 EVT NewVT) const {
4858 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"
) ? static_cast<void> (0) : __assert_fail ("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4858, __PRETTY_FUNCTION__))
;
4859
4860 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4861 // relocation target a movq or addq instruction: don't let the load shrink.
4862 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4863 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4864 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4865 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4866
4867 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
4868 // those uses are extracted directly into a store, then the extract + store
4869 // can be store-folded. Therefore, it's probably not worth splitting the load.
4870 EVT VT = Load->getValueType(0);
4871 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
4872 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
4873 // Skip uses of the chain value. Result 0 of the node is the load value.
4874 if (UI.getUse().getResNo() != 0)
4875 continue;
4876
4877 // If this use is not an extract + store, it's probably worth splitting.
4878 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
4879 UI->use_begin()->getOpcode() != ISD::STORE)
4880 return true;
4881 }
4882 // All non-chain uses are extract + store.
4883 return false;
4884 }
4885
4886 return true;
4887}
4888
4889/// Returns true if it is beneficial to convert a load of a constant
4890/// to just the constant itself.
4891bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4892 Type *Ty) const {
4893 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4893, __PRETTY_FUNCTION__))
;
4894
4895 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4896 if (BitSize == 0 || BitSize > 64)
4897 return false;
4898 return true;
4899}
4900
4901bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
4902 // If we are using XMM registers in the ABI and the condition of the select is
4903 // a floating-point compare and we have blendv or conditional move, then it is
4904 // cheaper to select instead of doing a cross-register move and creating a
4905 // load that depends on the compare result.
4906 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
4907 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
4908}
4909
4910bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4911 // TODO: It might be a win to ease or lift this restriction, but the generic
4912 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4913 if (VT.isVector() && Subtarget.hasAVX512())
4914 return false;
4915
4916 return true;
4917}
4918
4919bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
4920 SDValue C) const {
4921 // TODO: We handle scalars using custom code, but generic combining could make
4922 // that unnecessary.
4923 APInt MulC;
4924 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
4925 return false;
4926
4927 // Find the type this will be legalized too. Otherwise we might prematurely
4928 // convert this to shl+add/sub and then still have to type legalize those ops.
4929 // Another choice would be to defer the decision for illegal types until
4930 // after type legalization. But constant splat vectors of i64 can't make it
4931 // through type legalization on 32-bit targets so we would need to special
4932 // case vXi64.
4933 while (getTypeAction(Context, VT) != TypeLegal)
4934 VT = getTypeToTransformTo(Context, VT);
4935
4936 // If vector multiply is legal, assume that's faster than shl + add/sub.
4937 // TODO: Multiply is a complex op with higher latency and lower throughput in
4938 // most implementations, so this check could be loosened based on type
4939 // and/or a CPU attribute.
4940 if (isOperationLegal(ISD::MUL, VT))
4941 return false;
4942
4943 // shl+add, shl+sub, shl+add+neg
4944 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
4945 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
4946}
4947
4948bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
4949 bool IsSigned) const {
4950 // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
4951 return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
4952}
4953
4954bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4955 unsigned Index) const {
4956 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4957 return false;
4958
4959 // Mask vectors support all subregister combinations and operations that
4960 // extract half of vector.
4961 if (ResVT.getVectorElementType() == MVT::i1)
4962 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4963 (Index == ResVT.getVectorNumElements()));
4964
4965 return (Index % ResVT.getVectorNumElements()) == 0;
4966}
4967
4968bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
4969 unsigned Opc = VecOp.getOpcode();
4970
4971 // Assume target opcodes can't be scalarized.
4972 // TODO - do we have any exceptions?
4973 if (Opc >= ISD::BUILTIN_OP_END)
4974 return false;
4975
4976 // If the vector op is not supported, try to convert to scalar.
4977 EVT VecVT = VecOp.getValueType();
4978 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
4979 return true;
4980
4981 // If the vector op is supported, but the scalar op is not, the transform may
4982 // not be worthwhile.
4983 EVT ScalarVT = VecVT.getScalarType();
4984 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
4985}
4986
4987bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
4988 // TODO: Allow vectors?
4989 if (VT.isVector())
4990 return false;
4991 return VT.isSimple() || !isOperationExpand(Opcode, VT);
4992}
4993
4994bool X86TargetLowering::isCheapToSpeculateCttz() const {
4995 // Speculate cttz only if we can directly use TZCNT.
4996 return Subtarget.hasBMI();
4997}
4998
4999bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5000 // Speculate ctlz only if we can directly use LZCNT.
5001 return Subtarget.hasLZCNT();
5002}
5003
5004bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5005 const SelectionDAG &DAG,
5006 const MachineMemOperand &MMO) const {
5007 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5008 BitcastVT.getVectorElementType() == MVT::i1)
5009 return false;
5010
5011 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5012 return false;
5013
5014 // If both types are legal vectors, it's always ok to convert them.
5015 if (LoadVT.isVector() && BitcastVT.isVector() &&
5016 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5017 return true;
5018
5019 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5020}
5021
5022bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5023 const SelectionDAG &DAG) const {
5024 // Do not merge to float value size (128 bytes) if no implicit
5025 // float attribute is set.
5026 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5027 Attribute::NoImplicitFloat);
5028
5029 if (NoFloat) {
5030 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5031 return (MemVT.getSizeInBits() <= MaxIntSize);
5032 }
5033 // Make sure we don't merge greater than our preferred vector
5034 // width.
5035 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5036 return false;
5037 return true;
5038}
5039
5040bool X86TargetLowering::isCtlzFast() const {
5041 return Subtarget.hasFastLZCNT();
5042}
5043
5044bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5045 const Instruction &AndI) const {
5046 return true;
5047}
5048
5049bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5050 EVT VT = Y.getValueType();
5051
5052 if (VT.isVector())
5053 return false;
5054
5055 if (!Subtarget.hasBMI())
5056 return false;
5057
5058 // There are only 32-bit and 64-bit forms for 'andn'.
5059 if (VT != MVT::i32 && VT != MVT::i64)
5060 return false;
5061
5062 return !isa<ConstantSDNode>(Y);
5063}
5064
5065bool X86TargetLowering::hasAndNot(SDValue Y) const {
5066 EVT VT = Y.getValueType();
5067
5068 if (!VT.isVector())
5069 return hasAndNotCompare(Y);
5070
5071 // Vector.
5072
5073 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5074 return false;
5075
5076 if (VT == MVT::v4i32)
5077 return true;
5078
5079 return Subtarget.hasSSE2();
5080}
5081
5082bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5083 return X.getValueType().isScalarInteger(); // 'bt'
5084}
5085
5086bool X86TargetLowering::
5087 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5088 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5089 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5090 SelectionDAG &DAG) const {
5091 // Does baseline recommend not to perform the fold by default?
5092 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5093 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5094 return false;
5095 // For scalars this transform is always beneficial.
5096 if (X.getValueType().isScalarInteger())
5097 return true;
5098 // If all the shift amounts are identical, then transform is beneficial even
5099 // with rudimentary SSE2 shifts.
5100 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5101 return true;
5102 // If we have AVX2 with it's powerful shift operations, then it's also good.
5103 if (Subtarget.hasAVX2())
5104 return true;
5105 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5106 return NewShiftOpcode == ISD::SHL;
5107}
5108
5109bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5110 const SDNode *N, CombineLevel Level) const {
5111 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5115, __PRETTY_FUNCTION__))
5112 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5115, __PRETTY_FUNCTION__))
5113 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5115, __PRETTY_FUNCTION__))
5114 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5115, __PRETTY_FUNCTION__))
5115 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5115, __PRETTY_FUNCTION__))
;
5116 EVT VT = N->getValueType(0);
5117 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5118 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5119 // Only fold if the shift values are equal - so it folds to AND.
5120 // TODO - we should fold if either is a non-uniform vector but we don't do
5121 // the fold for non-splats yet.
5122 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5123 }
5124 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5125}
5126
5127bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5128 EVT VT = Y.getValueType();
5129
5130 // For vectors, we don't have a preference, but we probably want a mask.
5131 if (VT.isVector())
5132 return false;
5133
5134 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5135 if (VT == MVT::i64 && !Subtarget.is64Bit())
5136 return false;
5137
5138 return true;
5139}
5140
5141bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5142 SDNode *N) const {
5143 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5144 !Subtarget.isOSWindows())
5145 return false;
5146 return true;
5147}
5148
5149bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5150 // Any legal vector type can be splatted more efficiently than
5151 // loading/spilling from memory.
5152 return isTypeLegal(VT);
5153}
5154
5155MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5156 MVT VT = MVT::getIntegerVT(NumBits);
5157 if (isTypeLegal(VT))
5158 return VT;
5159
5160 // PMOVMSKB can handle this.
5161 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5162 return MVT::v16i8;
5163
5164 // VPMOVMSKB can handle this.
5165 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5166 return MVT::v32i8;
5167
5168 // TODO: Allow 64-bit type for 32-bit target.
5169 // TODO: 512-bit types should be allowed, but make sure that those
5170 // cases are handled in combineVectorSizedSetCCEquality().
5171
5172 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5173}
5174
5175/// Val is the undef sentinel value or equal to the specified value.
5176static bool isUndefOrEqual(int Val, int CmpVal) {
5177 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5178}
5179
5180/// Val is either the undef or zero sentinel value.
5181static bool isUndefOrZero(int Val) {
5182 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5183}
5184
5185/// Return true if every element in Mask, beginning from position Pos and ending
5186/// in Pos+Size is the undef sentinel value.
5187static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5188 return llvm::all_of(Mask.slice(Pos, Size),
5189 [](int M) { return M == SM_SentinelUndef; });
5190}
5191
5192/// Return true if the mask creates a vector whose lower half is undefined.
5193static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5194 unsigned NumElts = Mask.size();
5195 return isUndefInRange(Mask, 0, NumElts / 2);
5196}
5197
5198/// Return true if the mask creates a vector whose upper half is undefined.
5199static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5200 unsigned NumElts = Mask.size();
5201 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5202}
5203
5204/// Return true if Val falls within the specified range (L, H].
5205static bool isInRange(int Val, int Low, int Hi) {
5206 return (Val >= Low && Val < Hi);
5207}
5208
5209/// Return true if the value of any element in Mask falls within the specified
5210/// range (L, H].
5211static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5212 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5213}
5214
5215/// Return true if Val is undef or if its value falls within the
5216/// specified range (L, H].
5217static bool isUndefOrInRange(int Val, int Low, int Hi) {
5218 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5219}
5220
5221/// Return true if every element in Mask is undef or if its value
5222/// falls within the specified range (L, H].
5223static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5224 return llvm::all_of(
5225 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5226}
5227
5228/// Return true if Val is undef, zero or if its value falls within the
5229/// specified range (L, H].
5230static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5231 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5232}
5233
5234/// Return true if every element in Mask is undef, zero or if its value
5235/// falls within the specified range (L, H].
5236static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5237 return llvm::all_of(
5238 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5239}
5240
5241/// Return true if every element in Mask, beginning
5242/// from position Pos and ending in Pos + Size, falls within the specified
5243/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5244static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5245 unsigned Size, int Low, int Step = 1) {
5246 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5247 if (!isUndefOrEqual(Mask[i], Low))
5248 return false;
5249 return true;
5250}
5251
5252/// Return true if every element in Mask, beginning
5253/// from position Pos and ending in Pos+Size, falls within the specified
5254/// sequential range (Low, Low+Size], or is undef or is zero.
5255static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5256 unsigned Size, int Low,
5257 int Step = 1) {
5258 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5259 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5260 return false;
5261 return true;
5262}
5263
5264/// Return true if every element in Mask, beginning
5265/// from position Pos and ending in Pos+Size is undef or is zero.
5266static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5267 unsigned Size) {
5268 return llvm::all_of(Mask.slice(Pos, Size),
5269 [](int M) { return isUndefOrZero(M); });
5270}
5271
5272/// Helper function to test whether a shuffle mask could be
5273/// simplified by widening the elements being shuffled.
5274///
5275/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5276/// leaves it in an unspecified state.
5277///
5278/// NOTE: This must handle normal vector shuffle masks and *target* vector
5279/// shuffle masks. The latter have the special property of a '-2' representing
5280/// a zero-ed lane of a vector.
5281static bool canWidenShuffleElements(ArrayRef<int> Mask,
5282 SmallVectorImpl<int> &WidenedMask) {
5283 WidenedMask.assign(Mask.size() / 2, 0);
5284 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5285 int M0 = Mask[i];
5286 int M1 = Mask[i + 1];
5287
5288 // If both elements are undef, its trivial.
5289 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5290 WidenedMask[i / 2] = SM_SentinelUndef;
5291 continue;
5292 }
5293
5294 // Check for an undef mask and a mask value properly aligned to fit with
5295 // a pair of values. If we find such a case, use the non-undef mask's value.
5296 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5297 WidenedMask[i / 2] = M1 / 2;
5298 continue;
5299 }
5300 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5301 WidenedMask[i / 2] = M0 / 2;
5302 continue;
5303 }
5304
5305 // When zeroing, we need to spread the zeroing across both lanes to widen.
5306 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5307 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5308 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5309 WidenedMask[i / 2] = SM_SentinelZero;
5310 continue;
5311 }
5312 return false;
5313 }
5314
5315 // Finally check if the two mask values are adjacent and aligned with
5316 // a pair.
5317 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5318 WidenedMask[i / 2] = M0 / 2;
5319 continue;
5320 }
5321
5322 // Otherwise we can't safely widen the elements used in this shuffle.
5323 return false;
5324 }
5325 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5326, __PRETTY_FUNCTION__))
5326 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5326, __PRETTY_FUNCTION__))
;
5327
5328 return true;
5329}
5330
5331static bool canWidenShuffleElements(ArrayRef<int> Mask,
5332 const APInt &Zeroable,
5333 bool V2IsZero,
5334 SmallVectorImpl<int> &WidenedMask) {
5335 // Create an alternative mask with info about zeroable elements.
5336 // Here we do not set undef elements as zeroable.
5337 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5338 if (V2IsZero) {
5339 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((!Zeroable.isNullValue() && "V2's non-undef elements are used?!"
) ? static_cast<void> (0) : __assert_fail ("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5339, __PRETTY_FUNCTION__))
;
5340 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5341 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5342 ZeroableMask[i] = SM_SentinelZero;
5343 }
5344 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5345}
5346
5347static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5348 SmallVector<int, 32> WidenedMask;
5349 return canWidenShuffleElements(Mask, WidenedMask);
5350}
5351
5352/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5353bool X86::isZeroNode(SDValue Elt) {
5354 return isNullConstant(Elt) || isNullFPConstant(Elt);
5355}
5356
5357// Build a vector of constants.
5358// Use an UNDEF node if MaskElt == -1.
5359// Split 64-bit constants in the 32-bit mode.
5360static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5361 const SDLoc &dl, bool IsMask = false) {
5362
5363 SmallVector<SDValue, 32> Ops;
5364 bool Split = false;
5365
5366 MVT ConstVecVT = VT;
5367 unsigned NumElts = VT.getVectorNumElements();
5368 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5369 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5370 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5371 Split = true;
5372 }
5373
5374 MVT EltVT = ConstVecVT.getVectorElementType();
5375 for (unsigned i = 0; i < NumElts; ++i) {
5376 bool IsUndef = Values[i] < 0 && IsMask;
5377 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5378 DAG.getConstant(Values[i], dl, EltVT);
5379 Ops.push_back(OpNode);
5380 if (Split)
5381 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5382 DAG.getConstant(0, dl, EltVT));
5383 }
5384 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5385 if (Split)
5386 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5387 return ConstsNode;
5388}
5389
5390static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5391 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5392 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5393, __PRETTY_FUNCTION__))
5393 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5393, __PRETTY_FUNCTION__))
;
5394 SmallVector<SDValue, 32> Ops;
5395 bool Split = false;
5396
5397 MVT ConstVecVT = VT;
5398 unsigned NumElts = VT.getVectorNumElements();
5399 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5400 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5401 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5402 Split = true;
5403 }
5404
5405 MVT EltVT = ConstVecVT.getVectorElementType();
5406 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5407 if (Undefs[i]) {
5408 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5409 continue;
5410 }
5411 const APInt &V = Bits[i];
5412 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5412, __PRETTY_FUNCTION__))
;
5413 if (Split) {
5414 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5415 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5416 } else if (EltVT == MVT::f32) {
5417 APFloat FV(APFloat::IEEEsingle(), V);
5418 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5419 } else if (EltVT == MVT::f64) {
5420 APFloat FV(APFloat::IEEEdouble(), V);
5421 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5422 } else {
5423 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5424 }
5425 }
5426
5427 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5428 return DAG.getBitcast(VT, ConstsNode);
5429}
5430
5431/// Returns a vector of specified type with all zero elements.
5432static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5433 SelectionDAG &DAG, const SDLoc &dl) {
5434 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5436, __PRETTY_FUNCTION__))
5435 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5436, __PRETTY_FUNCTION__))
5436 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5436, __PRETTY_FUNCTION__))
;
5437
5438 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5439 // type. This ensures they get CSE'd. But if the integer type is not
5440 // available, use a floating-point +0.0 instead.
5441 SDValue Vec;
5442 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5443 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5444 } else if (VT.isFloatingPoint()) {
5445 Vec = DAG.getConstantFP(+0.0, dl, VT);
5446 } else if (VT.getVectorElementType() == MVT::i1) {
5447 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __PRETTY_FUNCTION__))
5448 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __PRETTY_FUNCTION__))
;
5449 Vec = DAG.getConstant(0, dl, VT);
5450 } else {
5451 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5452 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5453 }
5454 return DAG.getBitcast(VT, Vec);
5455}
5456
5457static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5458 const SDLoc &dl, unsigned vectorWidth) {
5459 EVT VT = Vec.getValueType();
5460 EVT ElVT = VT.getVectorElementType();
5461 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5462 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5463 VT.getVectorNumElements()/Factor);
5464
5465 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5466 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5467 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5467, __PRETTY_FUNCTION__))
;
5468
5469 // This is the index of the first element of the vectorWidth-bit chunk
5470 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5471 IdxVal &= ~(ElemsPerChunk - 1);
5472
5473 // If the input is a buildvector just emit a smaller one.
5474 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5475 return DAG.getBuildVector(ResultVT, dl,
5476 Vec->ops().slice(IdxVal, ElemsPerChunk));
5477
5478 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5479 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5480}
5481
5482/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5483/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5484/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5485/// instructions or a simple subregister reference. Idx is an index in the
5486/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5487/// lowering EXTRACT_VECTOR_ELT operations easier.
5488static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5489 SelectionDAG &DAG, const SDLoc &dl) {
5490 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5491, __PRETTY_FUNCTION__))
5491 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5491, __PRETTY_FUNCTION__))
;
5492 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5493}
5494
5495/// Generate a DAG to grab 256-bits from a 512-bit vector.
5496static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5497 SelectionDAG &DAG, const SDLoc &dl) {
5498 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5498, __PRETTY_FUNCTION__))
;
5499 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5500}
5501
5502static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5503 SelectionDAG &DAG, const SDLoc &dl,
5504 unsigned vectorWidth) {
5505 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5506, __PRETTY_FUNCTION__))
5506 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5506, __PRETTY_FUNCTION__))
;
5507 // Inserting UNDEF is Result
5508 if (Vec.isUndef())
5509 return Result;
5510 EVT VT = Vec.getValueType();
5511 EVT ElVT = VT.getVectorElementType();
5512 EVT ResultVT = Result.getValueType();
5513
5514 // Insert the relevant vectorWidth bits.
5515 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5516 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5516, __PRETTY_FUNCTION__))
;
5517
5518 // This is the index of the first element of the vectorWidth-bit chunk
5519 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5520 IdxVal &= ~(ElemsPerChunk - 1);
5521
5522 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5523 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5524}
5525
5526/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5527/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5528/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5529/// simple superregister reference. Idx is an index in the 128 bits
5530/// we want. It need not be aligned to a 128-bit boundary. That makes
5531/// lowering INSERT_VECTOR_ELT operations easier.
5532static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5533 SelectionDAG &DAG, const SDLoc &dl) {
5534 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5534, __PRETTY_FUNCTION__))
;
5535 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5536}
5537
5538/// Widen a vector to a larger size with the same scalar type, with the new
5539/// elements either zero or undef.
5540static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5541 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5542 const SDLoc &dl) {
5543 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5545, __PRETTY_FUNCTION__))
5544 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5545, __PRETTY_FUNCTION__))
5545 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5545, __PRETTY_FUNCTION__))
;
5546 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5547 : DAG.getUNDEF(VT);
5548 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5549 DAG.getIntPtrConstant(0, dl));
5550}
5551
5552/// Widen a vector to a larger size with the same scalar type, with the new
5553/// elements either zero or undef.
5554static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
5555 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5556 const SDLoc &dl, unsigned WideSizeInBits) {
5557 assert(Vec.getValueSizeInBits() < WideSizeInBits &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5559, __PRETTY_FUNCTION__))
5558 (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5559, __PRETTY_FUNCTION__))
5559 "Unsupported vector widening type")((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5559, __PRETTY_FUNCTION__))
;
5560 unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
5561 MVT SVT = Vec.getSimpleValueType().getScalarType();
5562 MVT VT = MVT::getVectorVT(SVT, WideNumElts);
5563 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
5564}
5565
5566// Helper function to collect subvector ops that are concated together,
5567// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
5568// The subvectors in Ops are guaranteed to be the same type.
5569static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
5570 assert(Ops.empty() && "Expected an empty ops vector")((Ops.empty() && "Expected an empty ops vector") ? static_cast
<void> (0) : __assert_fail ("Ops.empty() && \"Expected an empty ops vector\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5570, __PRETTY_FUNCTION__))
;
5571
5572 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
5573 Ops.append(N->op_begin(), N->op_end());
5574 return true;
5575 }
5576
5577 if (N->getOpcode() == ISD::INSERT_SUBVECTOR &&
5578 isa<ConstantSDNode>(N->getOperand(2))) {
5579 SDValue Src = N->getOperand(0);
5580 SDValue Sub = N->getOperand(1);
5581 const APInt &Idx = N->getConstantOperandAPInt(2);
5582 EVT VT = Src.getValueType();
5583 EVT SubVT = Sub.getValueType();
5584
5585 // TODO - Handle more general insert_subvector chains.
5586 if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
5587 Idx == (VT.getVectorNumElements() / 2) &&
5588 Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
5589 Src.getOperand(1).getValueType() == SubVT &&
5590 isNullConstant(Src.getOperand(2))) {
5591 Ops.push_back(Src.getOperand(1));
5592 Ops.push_back(Sub);
5593 return true;
5594 }
5595 }
5596
5597 return false;
5598}
5599
5600// Helper for splitting operands of an operation to legal target size and
5601// apply a function on each part.
5602// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5603// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5604// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5605// The argument Builder is a function that will be applied on each split part:
5606// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5607template <typename F>
5608SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5609 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5610 F Builder, bool CheckBWI = true) {
5611 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5611, __PRETTY_FUNCTION__))
;
5612 unsigned NumSubs = 1;
5613 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5614 (!CheckBWI && Subtarget.useAVX512Regs())) {
5615 if (VT.getSizeInBits() > 512) {
5616 NumSubs = VT.getSizeInBits() / 512;
5617 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5617, __PRETTY_FUNCTION__))
;
5618 }
5619 } else if (Subtarget.hasAVX2()) {
5620 if (VT.getSizeInBits() > 256) {
5621 NumSubs = VT.getSizeInBits() / 256;
5622 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5622, __PRETTY_FUNCTION__))
;
5623 }
5624 } else {
5625 if (VT.getSizeInBits() > 128) {
5626 NumSubs = VT.getSizeInBits() / 128;
5627 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5627, __PRETTY_FUNCTION__))
;
5628 }
5629 }
5630
5631 if (NumSubs == 1)
5632 return Builder(DAG, DL, Ops);
5633
5634 SmallVector<SDValue, 4> Subs;
5635 for (unsigned i = 0; i != NumSubs; ++i) {
5636 SmallVector<SDValue, 2> SubOps;
5637 for (SDValue Op : Ops) {
5638 EVT OpVT = Op.getValueType();
5639 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5640 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5641 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5642 }
5643 Subs.push_back(Builder(DAG, DL, SubOps));
5644 }
5645 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5646}
5647
5648/// Insert i1-subvector to i1-vector.
5649static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5650 const X86Subtarget &Subtarget) {
5651
5652 SDLoc dl(Op);
5653 SDValue Vec = Op.getOperand(0);
5654 SDValue SubVec = Op.getOperand(1);
5655 SDValue Idx = Op.getOperand(2);
5656
5657 if (!isa<ConstantSDNode>(Idx))
5658 return SDValue();
5659
5660 // Inserting undef is a nop. We can just return the original vector.
5661 if (SubVec.isUndef())
5662 return Vec;
5663
5664 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5665 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5666 return Op;
5667
5668 MVT OpVT = Op.getSimpleValueType();
5669 unsigned NumElems = OpVT.getVectorNumElements();
5670
5671 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5672
5673 // Extend to natively supported kshift.
5674 MVT WideOpVT = OpVT;
5675 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5676 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5677
5678 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5679 // if necessary.
5680 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5681 // May need to promote to a legal type.
5682 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5683 DAG.getConstant(0, dl, WideOpVT),
5684 SubVec, Idx);
5685 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5686 }
5687
5688 MVT SubVecVT = SubVec.getSimpleValueType();
5689 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5690
5691 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
5692 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
5693 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
;
5694
5695 SDValue Undef = DAG.getUNDEF(WideOpVT);
5696
5697 if (IdxVal == 0) {
5698 // Zero lower bits of the Vec
5699 SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
5700 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5701 ZeroIdx);
5702 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5703 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5704 // Merge them together, SubVec should be zero extended.
5705 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5706 DAG.getConstant(0, dl, WideOpVT),
5707 SubVec, ZeroIdx);
5708 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5709 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5710 }
5711
5712 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5713 Undef, SubVec, ZeroIdx);
5714
5715 if (Vec.isUndef()) {
5716 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5716, __PRETTY_FUNCTION__))
;
5717 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5718 DAG.getTargetConstant(IdxVal, dl, MVT::i8));
5719 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5720 }
5721
5722 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5723 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5723, __PRETTY_FUNCTION__))
;
5724 NumElems = WideOpVT.getVectorNumElements();
5725 unsigned ShiftLeft = NumElems - SubVecNumElems;
5726 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5727 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5728 DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
5729 if (ShiftRight != 0)
5730 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5731 DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
5732 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5733 }
5734
5735 // Simple case when we put subvector in the upper part
5736 if (IdxVal + SubVecNumElems == NumElems) {
5737 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5738 DAG.getTargetConstant(IdxVal, dl, MVT::i8));
5739 if (SubVecNumElems * 2 == NumElems) {
5740 // Special case, use legal zero extending insert_subvector. This allows
5741 // isel to opimitize when bits are known zero.
5742 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5743 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5744 DAG.getConstant(0, dl, WideOpVT),
5745 Vec, ZeroIdx);
5746 } else {
5747 // Otherwise use explicit shifts to zero the bits.
5748 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5749 Undef, Vec, ZeroIdx);
5750 NumElems = WideOpVT.getVectorNumElements();
5751 SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);
5752 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5753 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5754 }
5755 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5756 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5757 }
5758
5759 // Inserting into the middle is more complicated.
5760
5761 NumElems = WideOpVT.getVectorNumElements();
5762
5763 // Widen the vector if needed.
5764 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5765
5766 // Clear the upper bits of the subvector and move it to its insert position.
5767 unsigned ShiftLeft = NumElems - SubVecNumElems;
5768 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5769 DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
5770 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5771 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5772 DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
5773
5774 // Isolate the bits below the insertion point.
5775 unsigned LowShift = NumElems - IdxVal;
5776 SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec,
5777 DAG.getTargetConstant(LowShift, dl, MVT::i8));
5778 Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low,
5779 DAG.getTargetConstant(LowShift, dl, MVT::i8));
5780
5781 // Isolate the bits after the last inserted bit.
5782 unsigned HighShift = IdxVal + SubVecNumElems;
5783 SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5784 DAG.getTargetConstant(HighShift, dl, MVT::i8));
5785 High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High,
5786 DAG.getTargetConstant(HighShift, dl, MVT::i8));
5787
5788 // Now OR all 3 pieces together.
5789 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High);
5790 SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec);
5791
5792 // Reduce to original width if needed.
5793 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5794}
5795
5796static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG,
5797 const SDLoc &dl) {
5798 assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch")((V1.getValueType() == V2.getValueType() && "subvector type mismatch"
) ? static_cast<void> (0) : __assert_fail ("V1.getValueType() == V2.getValueType() && \"subvector type mismatch\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5798, __PRETTY_FUNCTION__))
;
5799 EVT SubVT = V1.getValueType();
5800 EVT SubSVT = SubVT.getScalarType();
5801 unsigned SubNumElts = SubVT.getVectorNumElements();
5802 unsigned SubVectorWidth = SubVT.getSizeInBits();
5803 EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts);
5804 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth);
5805 return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth);
5806}
5807
5808/// Returns a vector of specified type with all bits set.
5809/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5810/// Then bitcast to their original type, ensuring they get CSE'd.
5811static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5812 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5813, __PRETTY_FUNCTION__))
5813 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5813, __PRETTY_FUNCTION__))
;
5814
5815 APInt Ones = APInt::getAllOnesValue(32);
5816 unsigned NumElts = VT.getSizeInBits() / 32;
5817 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5818 return DAG.getBitcast(VT, Vec);
5819}
5820
5821// Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode.
5822static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) {
5823 switch (Opcode) {
5824 case ISD::ANY_EXTEND:
5825 case ISD::ANY_EXTEND_VECTOR_INREG:
5826 return ISD::ANY_EXTEND_VECTOR_INREG;
5827 case ISD::ZERO_EXTEND:
5828 case ISD::ZERO_EXTEND_VECTOR_INREG:
5829 return ISD::ZERO_EXTEND_VECTOR_INREG;
5830 case ISD::SIGN_EXTEND:
5831 case ISD::SIGN_EXTEND_VECTOR_INREG:
5832 return ISD::SIGN_EXTEND_VECTOR_INREG;
5833 }
5834 llvm_unreachable("Unknown opcode")::llvm::llvm_unreachable_internal("Unknown opcode", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5834)
;
5835}
5836
5837static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT,
5838 SDValue In, SelectionDAG &DAG) {
5839 EVT InVT = In.getValueType();
5840 assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.")((VT.isVector() && InVT.isVector() && "Expected vector VTs."
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && InVT.isVector() && \"Expected vector VTs.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5840, __PRETTY_FUNCTION__))
;
5841 assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||(((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||
ISD::ZERO_EXTEND == Opcode) && "Unknown extension opcode"
) ? static_cast<void> (0) : __assert_fail ("(ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && \"Unknown extension opcode\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5843, __PRETTY_FUNCTION__))
5842 ISD::ZERO_EXTEND == Opcode) &&(((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||
ISD::ZERO_EXTEND == Opcode) && "Unknown extension opcode"
) ? static_cast<void> (0) : __assert_fail ("(ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && \"Unknown extension opcode\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5843, __PRETTY_FUNCTION__))
5843 "Unknown extension opcode")(((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||
ISD::ZERO_EXTEND == Opcode) && "Unknown extension opcode"
) ? static_cast<void> (0) : __assert_fail ("(ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && \"Unknown extension opcode\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5843, __PRETTY_FUNCTION__))
;
5844
5845 // For 256-bit vectors, we only need the lower (128-bit) input half.
5846 // For 512-bit vectors, we only need the lower input half or quarter.
5847 if (InVT.getSizeInBits() > 128) {
5848 assert(VT.getSizeInBits() == InVT.getSizeInBits() &&((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5849, __PRETTY_FUNCTION__))
5849 "Expected VTs to be the same size!")((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5849, __PRETTY_FUNCTION__))
;
5850 unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();