Bug Summary

File:llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 7291, column 7
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fdenormal-fp-math=ieee,ieee -fdenormal-fp-math-f32=ieee,ieee -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e=. -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-02-25-045343-43954-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "Utils/X86ShuffleDecode.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86InstrBuilder.h"
19#include "X86IntrinsicsInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86TargetMachine.h"
22#include "X86TargetObjectFile.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringExtras.h"
27#include "llvm/ADT/StringSwitch.h"
28#include "llvm/Analysis/BlockFrequencyInfo.h"
29#include "llvm/Analysis/EHPersonalities.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/VectorUtils.h"
32#include "llvm/CodeGen/IntrinsicLowering.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineJumpTableInfo.h"
37#include "llvm/CodeGen/MachineModuleInfo.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/TargetLowering.h"
40#include "llvm/CodeGen/WinEHFuncInfo.h"
41#include "llvm/IR/CallSite.h"
42#include "llvm/IR/CallingConv.h"
43#include "llvm/IR/Constants.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/DiagnosticInfo.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalAlias.h"
48#include "llvm/IR/GlobalVariable.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/MC/MCAsmInfo.h"
52#include "llvm/MC/MCContext.h"
53#include "llvm/MC/MCExpr.h"
54#include "llvm/MC/MCSymbol.h"
55#include "llvm/Support/CommandLine.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/Support/KnownBits.h"
59#include "llvm/Support/MathExtras.h"
60#include "llvm/Target/TargetOptions.h"
61#include <algorithm>
62#include <bitset>
63#include <cctype>
64#include <numeric>
65using namespace llvm;
66
67#define DEBUG_TYPE"x86-isel" "x86-isel"
68
69STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
70
71static cl::opt<int> ExperimentalPrefLoopAlignment(
72 "x86-experimental-pref-loop-alignment", cl::init(4),
73 cl::desc(
74 "Sets the preferable loop alignment for experiments (as log2 bytes)"
75 "(the last x86-experimental-pref-loop-alignment bits"
76 " of the loop header PC will be 0)."),
77 cl::Hidden);
78
79// Added in 10.0.
80static cl::opt<bool> EnableOldKNLABI(
81 "x86-enable-old-knl-abi", cl::init(false),
82 cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of "
83 "one ZMM register on AVX512F, but not AVX512BW targets."),
84 cl::Hidden);
85
86static cl::opt<bool> MulConstantOptimization(
87 "mul-constant-optimization", cl::init(true),
88 cl::desc("Replace 'mul x, Const' with more effective instructions like "
89 "SHIFT, LEA, etc."),
90 cl::Hidden);
91
92static cl::opt<bool> ExperimentalUnorderedISEL(
93 "x86-experimental-unordered-atomic-isel", cl::init(false),
94 cl::desc("Use LoadSDNode and StoreSDNode instead of "
95 "AtomicSDNode for unordered atomic loads and "
96 "stores respectively."),
97 cl::Hidden);
98
99/// Call this when the user attempts to do something unsupported, like
100/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
101/// report_fatal_error, so calling code should attempt to recover without
102/// crashing.
103static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
104 const char *Msg) {
105 MachineFunction &MF = DAG.getMachineFunction();
106 DAG.getContext()->diagnose(
107 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
108}
109
110X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
111 const X86Subtarget &STI)
112 : TargetLowering(TM), Subtarget(STI) {
113 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
114 X86ScalarSSEf64 = Subtarget.hasSSE2();
115 X86ScalarSSEf32 = Subtarget.hasSSE1();
116 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
117
118 // Set up the TargetLowering object.
119
120 // X86 is weird. It always uses i8 for shift amounts and setcc results.
121 setBooleanContents(ZeroOrOneBooleanContent);
122 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
124
125 // For 64-bit, since we have so many registers, use the ILP scheduler.
126 // For 32-bit, use the register pressure specific scheduling.
127 // For Atom, always use ILP scheduling.
128 if (Subtarget.isAtom())
129 setSchedulingPreference(Sched::ILP);
130 else if (Subtarget.is64Bit())
131 setSchedulingPreference(Sched::ILP);
132 else
133 setSchedulingPreference(Sched::RegPressure);
134 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
135 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
136
137 // Bypass expensive divides and use cheaper ones.
138 if (TM.getOptLevel() >= CodeGenOpt::Default) {
139 if (Subtarget.hasSlowDivide32())
140 addBypassSlowDiv(32, 8);
141 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
142 addBypassSlowDiv(64, 32);
143 }
144
145 if (Subtarget.isTargetWindowsMSVC() ||
146 Subtarget.isTargetWindowsItanium()) {
147 // Setup Windows compiler runtime calls.
148 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
149 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
150 setLibcallName(RTLIB::SREM_I64, "_allrem");
151 setLibcallName(RTLIB::UREM_I64, "_aullrem");
152 setLibcallName(RTLIB::MUL_I64, "_allmul");
153 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
154 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
155 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
156 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
157 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
158 }
159
160 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
161 // MSVCRT doesn't have powi; fall back to pow
162 setLibcallName(RTLIB::POWI_F32, nullptr);
163 setLibcallName(RTLIB::POWI_F64, nullptr);
164 }
165
166 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
167 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
168 // FIXME: Should we be limiting the atomic size on other configs? Default is
169 // 1024.
170 if (!Subtarget.hasCmpxchg8b())
171 setMaxAtomicSizeInBitsSupported(32);
172
173 // Set up the register classes.
174 addRegisterClass(MVT::i8, &X86::GR8RegClass);
175 addRegisterClass(MVT::i16, &X86::GR16RegClass);
176 addRegisterClass(MVT::i32, &X86::GR32RegClass);
177 if (Subtarget.is64Bit())
178 addRegisterClass(MVT::i64, &X86::GR64RegClass);
179
180 for (MVT VT : MVT::integer_valuetypes())
181 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
182
183 // We don't accept any truncstore of integer registers.
184 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
185 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
186 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
187 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
188 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
189 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
190
191 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192
193 // SETOEQ and SETUNE require checking two conditions.
194 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
195 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
196 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
197 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
198 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
199 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
200
201 // Integer absolute.
202 if (Subtarget.hasCMov()) {
203 setOperationAction(ISD::ABS , MVT::i16 , Custom);
204 setOperationAction(ISD::ABS , MVT::i32 , Custom);
205 }
206 setOperationAction(ISD::ABS , MVT::i64 , Custom);
207
208 // Funnel shifts.
209 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
210 setOperationAction(ShiftOp , MVT::i16 , Custom);
211 setOperationAction(ShiftOp , MVT::i32 , Custom);
212 if (Subtarget.is64Bit())
213 setOperationAction(ShiftOp , MVT::i64 , Custom);
214 }
215
216 if (!Subtarget.useSoftFloat()) {
217 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
218 // operation.
219 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
220 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
221 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
222 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
223 // We have an algorithm for SSE2, and we turn this into a 64-bit
224 // FILD or VCVTUSI2SS/SD for other targets.
225 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
226 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
227 // We have an algorithm for SSE2->double, and we turn this into a
228 // 64-bit FILD followed by conditional FADD for other targets.
229 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
230 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
231
232 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
233 // this operation.
234 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
235 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
236 // SSE has no i16 to fp conversion, only i32. We promote in the handler
237 // to allow f80 to use i16 and f64 to use i16 with sse1 only
238 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
239 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
240 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
241 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
242 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
243 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
244 // are Legal, f80 is custom lowered.
245 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
246 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
247
248 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
249 // this operation.
250 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
251 // FIXME: This doesn't generate invalid exception when it should. PR44019.
252 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
253 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
254 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
255 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
256 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
257 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
258 // are Legal, f80 is custom lowered.
259 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
260 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
261
262 // Handle FP_TO_UINT by promoting the destination to a larger signed
263 // conversion.
264 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
265 // FIXME: This doesn't generate invalid exception when it should. PR44019.
266 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
267 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
268 // FIXME: This doesn't generate invalid exception when it should. PR44019.
269 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
270 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
271 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
273 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
274
275 setOperationAction(ISD::LRINT, MVT::f32, Custom);
276 setOperationAction(ISD::LRINT, MVT::f64, Custom);
277 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
278 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
279
280 if (!Subtarget.is64Bit()) {
281 setOperationAction(ISD::LRINT, MVT::i64, Custom);
282 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
283 }
284 }
285
286 // Handle address space casts between mixed sized pointers.
287 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
288 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
289
290 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
291 if (!X86ScalarSSEf64) {
292 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
293 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
294 if (Subtarget.is64Bit()) {
295 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
296 // Without SSE, i64->f64 goes through memory.
297 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
298 }
299 } else if (!Subtarget.is64Bit())
300 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
301
302 // Scalar integer divide and remainder are lowered to use operations that
303 // produce two results, to match the available instructions. This exposes
304 // the two-result form to trivial CSE, which is able to combine x/y and x%y
305 // into a single instruction.
306 //
307 // Scalar integer multiply-high is also lowered to use two-result
308 // operations, to match the available instructions. However, plain multiply
309 // (low) operations are left as Legal, as there are single-result
310 // instructions for this in x86. Using the two-result multiply instructions
311 // when both high and low results are needed must be arranged by dagcombine.
312 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
313 setOperationAction(ISD::MULHS, VT, Expand);
314 setOperationAction(ISD::MULHU, VT, Expand);
315 setOperationAction(ISD::SDIV, VT, Expand);
316 setOperationAction(ISD::UDIV, VT, Expand);
317 setOperationAction(ISD::SREM, VT, Expand);
318 setOperationAction(ISD::UREM, VT, Expand);
319 }
320
321 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
322 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
323 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
324 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
325 setOperationAction(ISD::BR_CC, VT, Expand);
326 setOperationAction(ISD::SELECT_CC, VT, Expand);
327 }
328 if (Subtarget.is64Bit())
329 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
330 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
331 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
332 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
333
334 setOperationAction(ISD::FREM , MVT::f32 , Expand);
335 setOperationAction(ISD::FREM , MVT::f64 , Expand);
336 setOperationAction(ISD::FREM , MVT::f80 , Expand);
337 setOperationAction(ISD::FREM , MVT::f128 , Expand);
338 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
339
340 // Promote the i8 variants and force them on up to i32 which has a shorter
341 // encoding.
342 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
343 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
344 if (!Subtarget.hasBMI()) {
345 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
346 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
347 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
348 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
349 if (Subtarget.is64Bit()) {
350 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
351 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
352 }
353 }
354
355 if (Subtarget.hasLZCNT()) {
356 // When promoting the i8 variants, force them to i32 for a shorter
357 // encoding.
358 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
359 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
360 } else {
361 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
362 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
363 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
364 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
365 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
366 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
367 if (Subtarget.is64Bit()) {
368 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
369 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
370 }
371 }
372
373 // Special handling for half-precision floating point conversions.
374 // If we don't have F16C support, then lower half float conversions
375 // into library calls.
376 if (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) {
377 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
378 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
379 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
380 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
381 } else {
382 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
383 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand);
384 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
385 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand);
386 }
387
388 // There's never any support for operations beyond MVT::f32.
389 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
390 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
391 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
392 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
393 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f80, Expand);
394 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f128, Expand);
395 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
396 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
397 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
398 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand);
399 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f80, Expand);
400 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f128, Expand);
401
402 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
403 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
404 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
405 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
406 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
407 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
408 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
409 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
410
411 if (Subtarget.hasPOPCNT()) {
412 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
413 } else {
414 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
415 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
416 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
417 if (Subtarget.is64Bit())
418 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
419 else
420 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
421 }
422
423 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
424
425 if (!Subtarget.hasMOVBE())
426 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
427
428 // X86 wants to expand cmov itself.
429 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
430 setOperationAction(ISD::SELECT, VT, Custom);
431 setOperationAction(ISD::SETCC, VT, Custom);
432 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
433 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
434 }
435 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
436 if (VT == MVT::i64 && !Subtarget.is64Bit())
437 continue;
438 setOperationAction(ISD::SELECT, VT, Custom);
439 setOperationAction(ISD::SETCC, VT, Custom);
440 }
441
442 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
443 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
444 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
445
446 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
447 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
448 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
449 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
450 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
451 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
452 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
453 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
454
455 // Darwin ABI issue.
456 for (auto VT : { MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::ConstantPool , VT, Custom);
460 setOperationAction(ISD::JumpTable , VT, Custom);
461 setOperationAction(ISD::GlobalAddress , VT, Custom);
462 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
463 setOperationAction(ISD::ExternalSymbol , VT, Custom);
464 setOperationAction(ISD::BlockAddress , VT, Custom);
465 }
466
467 // 64-bit shl, sra, srl (iff 32-bit x86)
468 for (auto VT : { MVT::i32, MVT::i64 }) {
469 if (VT == MVT::i64 && !Subtarget.is64Bit())
470 continue;
471 setOperationAction(ISD::SHL_PARTS, VT, Custom);
472 setOperationAction(ISD::SRA_PARTS, VT, Custom);
473 setOperationAction(ISD::SRL_PARTS, VT, Custom);
474 }
475
476 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
477 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
478
479 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
480
481 // Expand certain atomics
482 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
483 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
484 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
485 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
486 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
487 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
488 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
489 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
490 }
491
492 if (!Subtarget.is64Bit())
493 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
494
495 if (Subtarget.hasCmpxchg16b()) {
496 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
497 }
498
499 // FIXME - use subtarget debug flags
500 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
501 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
502 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
503 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
504 }
505
506 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
507 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
508
509 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
510 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
511
512 setOperationAction(ISD::TRAP, MVT::Other, Legal);
513 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
514
515 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
516 setOperationAction(ISD::VASTART , MVT::Other, Custom);
517 setOperationAction(ISD::VAEND , MVT::Other, Expand);
518 bool Is64Bit = Subtarget.is64Bit();
519 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
520 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
521
522 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
523 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
524
525 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
526
527 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
528 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
529 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
530
531 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
532 // f32 and f64 use SSE.
533 // Set up the FP register classes.
534 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
535 : &X86::FR32RegClass);
536 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
537 : &X86::FR64RegClass);
538
539 // Disable f32->f64 extload as we can only generate this in one instruction
540 // under optsize. So its easier to pattern match (fpext (load)) for that
541 // case instead of needing to emit 2 instructions for extload in the
542 // non-optsize case.
543 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
544
545 for (auto VT : { MVT::f32, MVT::f64 }) {
546 // Use ANDPD to simulate FABS.
547 setOperationAction(ISD::FABS, VT, Custom);
548
549 // Use XORP to simulate FNEG.
550 setOperationAction(ISD::FNEG, VT, Custom);
551
552 // Use ANDPD and ORPD to simulate FCOPYSIGN.
553 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
554
555 // These might be better off as horizontal vector ops.
556 setOperationAction(ISD::FADD, VT, Custom);
557 setOperationAction(ISD::FSUB, VT, Custom);
558
559 // We don't support sin/cos/fmod
560 setOperationAction(ISD::FSIN , VT, Expand);
561 setOperationAction(ISD::FCOS , VT, Expand);
562 setOperationAction(ISD::FSINCOS, VT, Expand);
563 }
564
565 // Lower this to MOVMSK plus an AND.
566 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
567 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
568
569 } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
570 // Use SSE for f32, x87 for f64.
571 // Set up the FP register classes.
572 addRegisterClass(MVT::f32, &X86::FR32RegClass);
573 if (UseX87)
574 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
575
576 // Use ANDPS to simulate FABS.
577 setOperationAction(ISD::FABS , MVT::f32, Custom);
578
579 // Use XORP to simulate FNEG.
580 setOperationAction(ISD::FNEG , MVT::f32, Custom);
581
582 if (UseX87)
583 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
584
585 // Use ANDPS and ORPS to simulate FCOPYSIGN.
586 if (UseX87)
587 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
588 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
589
590 // We don't support sin/cos/fmod
591 setOperationAction(ISD::FSIN , MVT::f32, Expand);
592 setOperationAction(ISD::FCOS , MVT::f32, Expand);
593 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
594
595 if (UseX87) {
596 // Always expand sin/cos functions even though x87 has an instruction.
597 setOperationAction(ISD::FSIN, MVT::f64, Expand);
598 setOperationAction(ISD::FCOS, MVT::f64, Expand);
599 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
600 }
601 } else if (UseX87) {
602 // f32 and f64 in x87.
603 // Set up the FP register classes.
604 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
605 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
606
607 for (auto VT : { MVT::f32, MVT::f64 }) {
608 setOperationAction(ISD::UNDEF, VT, Expand);
609 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
610
611 // Always expand sin/cos functions even though x87 has an instruction.
612 setOperationAction(ISD::FSIN , VT, Expand);
613 setOperationAction(ISD::FCOS , VT, Expand);
614 setOperationAction(ISD::FSINCOS, VT, Expand);
615 }
616 }
617
618 // Expand FP32 immediates into loads from the stack, save special cases.
619 if (isTypeLegal(MVT::f32)) {
620 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
621 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
622 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
623 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
624 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
625 } else // SSE immediates.
626 addLegalFPImmediate(APFloat(+0.0f)); // xorps
627 }
628 // Expand FP64 immediates into loads from the stack, save special cases.
629 if (isTypeLegal(MVT::f64)) {
630 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
631 addLegalFPImmediate(APFloat(+0.0)); // FLD0
632 addLegalFPImmediate(APFloat(+1.0)); // FLD1
633 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
634 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
635 } else // SSE immediates.
636 addLegalFPImmediate(APFloat(+0.0)); // xorpd
637 }
638 // Handle constrained floating-point operations of scalar.
639 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
640 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
641 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
642 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
643 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
644 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
645 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
646 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
647 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
648 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
649 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
650 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
651 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
652
653 // We don't support FMA.
654 setOperationAction(ISD::FMA, MVT::f64, Expand);
655 setOperationAction(ISD::FMA, MVT::f32, Expand);
656
657 // f80 always uses X87.
658 if (UseX87) {
659 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
660 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
661 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
662 {
663 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
664 addLegalFPImmediate(TmpFlt); // FLD0
665 TmpFlt.changeSign();
666 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
667
668 bool ignored;
669 APFloat TmpFlt2(+1.0);
670 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
671 &ignored);
672 addLegalFPImmediate(TmpFlt2); // FLD1
673 TmpFlt2.changeSign();
674 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
675 }
676
677 // Always expand sin/cos functions even though x87 has an instruction.
678 setOperationAction(ISD::FSIN , MVT::f80, Expand);
679 setOperationAction(ISD::FCOS , MVT::f80, Expand);
680 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
681
682 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
683 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
684 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
685 setOperationAction(ISD::FRINT, MVT::f80, Expand);
686 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
687 setOperationAction(ISD::FMA, MVT::f80, Expand);
688 setOperationAction(ISD::LROUND, MVT::f80, Expand);
689 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
690 setOperationAction(ISD::LRINT, MVT::f80, Custom);
691 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
692
693 // Handle constrained floating-point operations of scalar.
694 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
695 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
696 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
697 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
698 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
699 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
700 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
701 // as Custom.
702 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
703 }
704
705 // f128 uses xmm registers, but most operations require libcalls.
706 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
707 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
708 : &X86::VR128RegClass);
709
710 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
711
712 setOperationAction(ISD::FADD, MVT::f128, LibCall);
713 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
714 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
715 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
716 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
717 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
718 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
719 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
720 setOperationAction(ISD::FMA, MVT::f128, LibCall);
721 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
722
723 setOperationAction(ISD::FABS, MVT::f128, Custom);
724 setOperationAction(ISD::FNEG, MVT::f128, Custom);
725 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
726
727 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
728 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
729 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
730 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
731 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
732 // No STRICT_FSINCOS
733 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
734 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
735
736 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
737 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
738 // We need to custom handle any FP_ROUND with an f128 input, but
739 // LegalizeDAG uses the result type to know when to run a custom handler.
740 // So we have to list all legal floating point result types here.
741 if (isTypeLegal(MVT::f32)) {
742 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
743 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
744 }
745 if (isTypeLegal(MVT::f64)) {
746 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
747 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
748 }
749 if (isTypeLegal(MVT::f80)) {
750 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
751 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
752 }
753
754 setOperationAction(ISD::SETCC, MVT::f128, Custom);
755
756 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
757 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
758 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
759 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
760 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
761 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
762 }
763
764 // Always use a library call for pow.
765 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
766 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
767 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
768 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
769
770 setOperationAction(ISD::FLOG, MVT::f80, Expand);
771 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
772 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
773 setOperationAction(ISD::FEXP, MVT::f80, Expand);
774 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
775 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
776 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
777
778 // Some FP actions are always expanded for vector types.
779 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
780 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
781 setOperationAction(ISD::FSIN, VT, Expand);
782 setOperationAction(ISD::FSINCOS, VT, Expand);
783 setOperationAction(ISD::FCOS, VT, Expand);
784 setOperationAction(ISD::FREM, VT, Expand);
785 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
786 setOperationAction(ISD::FPOW, VT, Expand);
787 setOperationAction(ISD::FLOG, VT, Expand);
788 setOperationAction(ISD::FLOG2, VT, Expand);
789 setOperationAction(ISD::FLOG10, VT, Expand);
790 setOperationAction(ISD::FEXP, VT, Expand);
791 setOperationAction(ISD::FEXP2, VT, Expand);
792 }
793
794 // First set operation action for all vector types to either promote
795 // (for widening) or expand (for scalarization). Then we will selectively
796 // turn on ones that can be effectively codegen'd.
797 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
798 setOperationAction(ISD::SDIV, VT, Expand);
799 setOperationAction(ISD::UDIV, VT, Expand);
800 setOperationAction(ISD::SREM, VT, Expand);
801 setOperationAction(ISD::UREM, VT, Expand);
802 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
803 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
804 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
805 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
806 setOperationAction(ISD::FMA, VT, Expand);
807 setOperationAction(ISD::FFLOOR, VT, Expand);
808 setOperationAction(ISD::FCEIL, VT, Expand);
809 setOperationAction(ISD::FTRUNC, VT, Expand);
810 setOperationAction(ISD::FRINT, VT, Expand);
811 setOperationAction(ISD::FNEARBYINT, VT, Expand);
812 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
813 setOperationAction(ISD::MULHS, VT, Expand);
814 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
815 setOperationAction(ISD::MULHU, VT, Expand);
816 setOperationAction(ISD::SDIVREM, VT, Expand);
817 setOperationAction(ISD::UDIVREM, VT, Expand);
818 setOperationAction(ISD::CTPOP, VT, Expand);
819 setOperationAction(ISD::CTTZ, VT, Expand);
820 setOperationAction(ISD::CTLZ, VT, Expand);
821 setOperationAction(ISD::ROTL, VT, Expand);
822 setOperationAction(ISD::ROTR, VT, Expand);
823 setOperationAction(ISD::BSWAP, VT, Expand);
824 setOperationAction(ISD::SETCC, VT, Expand);
825 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
826 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
827 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
828 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
829 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
830 setOperationAction(ISD::TRUNCATE, VT, Expand);
831 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
832 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
833 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
834 setOperationAction(ISD::SELECT_CC, VT, Expand);
835 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
836 setTruncStoreAction(InnerVT, VT, Expand);
837
838 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
839 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
840
841 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
842 // types, we have to deal with them whether we ask for Expansion or not.
843 // Setting Expand causes its own optimisation problems though, so leave
844 // them legal.
845 if (VT.getVectorElementType() == MVT::i1)
846 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
847
848 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
849 // split/scalarized right now.
850 if (VT.getVectorElementType() == MVT::f16)
851 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
852 }
853 }
854
855 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
856 // with -msoft-float, disable use of MMX as well.
857 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
858 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
859 // No operations on x86mmx supported, everything uses intrinsics.
860 }
861
862 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
863 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
864 : &X86::VR128RegClass);
865
866 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
867 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
868 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
869 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
870 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
871 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
872 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
873 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
874
875 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
876 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
877
878 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
879 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
880 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
881 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
882 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
883 }
884
885 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
886 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
887 : &X86::VR128RegClass);
888
889 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
890 // registers cannot be used even for integer operations.
891 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
892 : &X86::VR128RegClass);
893 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
894 : &X86::VR128RegClass);
895 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
896 : &X86::VR128RegClass);
897 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
898 : &X86::VR128RegClass);
899
900 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
901 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
902 setOperationAction(ISD::SDIV, VT, Custom);
903 setOperationAction(ISD::SREM, VT, Custom);
904 setOperationAction(ISD::UDIV, VT, Custom);
905 setOperationAction(ISD::UREM, VT, Custom);
906 }
907
908 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
909 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
910 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
911
912 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
913 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
914 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
915 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
916 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
917 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
918 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
919 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
920 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
921 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
922 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
923 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
924 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
925
926 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
927 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
928 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
929 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
930 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
931 }
932
933 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
934 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
935 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
936 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
937 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
938 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
939 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
940 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
941 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
942 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
943 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
944 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
945
946 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
947 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
948 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
949
950 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
951 setOperationAction(ISD::SETCC, VT, Custom);
952 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
953 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
954 setOperationAction(ISD::CTPOP, VT, Custom);
955 setOperationAction(ISD::ABS, VT, Custom);
956
957 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
958 // setcc all the way to isel and prefer SETGT in some isel patterns.
959 setCondCodeAction(ISD::SETLT, VT, Custom);
960 setCondCodeAction(ISD::SETLE, VT, Custom);
961 }
962
963 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
964 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
965 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
966 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
967 setOperationAction(ISD::VSELECT, VT, Custom);
968 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
969 }
970
971 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
972 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
973 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
974 setOperationAction(ISD::VSELECT, VT, Custom);
975
976 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
977 continue;
978
979 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
980 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
981 }
982
983 // Custom lower v2i64 and v2f64 selects.
984 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
985 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
986 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
987 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
988 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
989
990 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
991 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
992 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
993 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
994
995 // Custom legalize these to avoid over promotion or custom promotion.
996 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
997 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
998 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
999 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1000 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1001 }
1002
1003 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1004 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
1005 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1006 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1007
1008 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1009 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1010
1011 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1012 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1013
1014 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1015 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1016 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1017 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1018 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1019
1020 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1021 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1022 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1023 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1024
1025 // We want to legalize this to an f64 load rather than an i64 load on
1026 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1027 // store.
1028 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1029 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1030 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1031 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1032 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1033 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1034
1035 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1036 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1037 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1038 if (!Subtarget.hasAVX512())
1039 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1040
1041 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1042 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1043 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1044
1045 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1046
1047 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1048 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1049 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1050 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1051 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1052 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1053
1054 // In the customized shift lowering, the legal v4i32/v2i64 cases
1055 // in AVX2 will be recognized.
1056 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1057 setOperationAction(ISD::SRL, VT, Custom);
1058 setOperationAction(ISD::SHL, VT, Custom);
1059 setOperationAction(ISD::SRA, VT, Custom);
1060 }
1061
1062 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1063 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1064
1065 // With AVX512, expanding (and promoting the shifts) is better.
1066 if (!Subtarget.hasAVX512())
1067 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1068
1069 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1070 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1071 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1072 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1073 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1074 }
1075
1076 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1077 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1078 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1079 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1080 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1081 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1082 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1083 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1084 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1085
1086 // These might be better off as horizontal vector ops.
1087 setOperationAction(ISD::ADD, MVT::i16, Custom);
1088 setOperationAction(ISD::ADD, MVT::i32, Custom);
1089 setOperationAction(ISD::SUB, MVT::i16, Custom);
1090 setOperationAction(ISD::SUB, MVT::i32, Custom);
1091 }
1092
1093 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1094 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1095 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1096 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1097 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1098 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1099 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1100 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1101 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1102 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1103 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1104 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1105
1106 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1107 }
1108
1109 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1110 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1111 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1112 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1113 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1114 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1115 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1116 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1117
1118 // FIXME: Do we need to handle scalar-to-vector here?
1119 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1120
1121 // We directly match byte blends in the backend as they match the VSELECT
1122 // condition form.
1123 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1124
1125 // SSE41 brings specific instructions for doing vector sign extend even in
1126 // cases where we don't have SRA.
1127 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1128 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1129 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1130 }
1131
1132 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1133 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1134 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1135 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1136 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1137 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1138 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1139 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1140 }
1141
1142 // i8 vectors are custom because the source register and source
1143 // source memory operand types are not the same width.
1144 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1145
1146 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1147 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1148 // do the pre and post work in the vector domain.
1149 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1150 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1151 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1152 // so that DAG combine doesn't try to turn it into uint_to_fp.
1153 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1154 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1155 }
1156 }
1157
1158 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1159 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1160 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1161 setOperationAction(ISD::ROTL, VT, Custom);
1162
1163 // XOP can efficiently perform BITREVERSE with VPPERM.
1164 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1165 setOperationAction(ISD::BITREVERSE, VT, Custom);
1166
1167 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1168 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1169 setOperationAction(ISD::BITREVERSE, VT, Custom);
1170 }
1171
1172 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1173 bool HasInt256 = Subtarget.hasInt256();
1174
1175 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1176 : &X86::VR256RegClass);
1177 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1178 : &X86::VR256RegClass);
1179 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1180 : &X86::VR256RegClass);
1181 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1182 : &X86::VR256RegClass);
1183 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1184 : &X86::VR256RegClass);
1185 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1186 : &X86::VR256RegClass);
1187
1188 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1189 setOperationAction(ISD::FFLOOR, VT, Legal);
1190 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1191 setOperationAction(ISD::FCEIL, VT, Legal);
1192 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1193 setOperationAction(ISD::FTRUNC, VT, Legal);
1194 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1195 setOperationAction(ISD::FRINT, VT, Legal);
1196 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1197 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1198 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1199
1200 setOperationAction(ISD::FROUND, VT, Custom);
1201
1202 setOperationAction(ISD::FNEG, VT, Custom);
1203 setOperationAction(ISD::FABS, VT, Custom);
1204 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1205 }
1206
1207 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1208 // even though v8i16 is a legal type.
1209 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1210 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1211 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1212 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1213 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1214 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1215
1216 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1217 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1218
1219 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1220 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1221 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1222 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1223 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1224 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1225 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1226 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1227 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1228 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1229 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1230 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1231
1232 if (!Subtarget.hasAVX512())
1233 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1234
1235 // In the customized shift lowering, the legal v8i32/v4i64 cases
1236 // in AVX2 will be recognized.
1237 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1238 setOperationAction(ISD::SRL, VT, Custom);
1239 setOperationAction(ISD::SHL, VT, Custom);
1240 setOperationAction(ISD::SRA, VT, Custom);
1241 }
1242
1243 // These types need custom splitting if their input is a 128-bit vector.
1244 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1245 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1246 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1247 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1248
1249 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1250 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1251
1252 // With BWI, expanding (and promoting the shifts) is the better.
1253 if (!Subtarget.hasBWI())
1254 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1255
1256 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1257 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1258 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1259 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1260 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1261 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1262
1263 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1264 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1265 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1266 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1267 }
1268
1269 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1270 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1271 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1272 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1273
1274 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1275 setOperationAction(ISD::SETCC, VT, Custom);
1276 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1277 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1278 setOperationAction(ISD::CTPOP, VT, Custom);
1279 setOperationAction(ISD::CTLZ, VT, Custom);
1280
1281 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1282 // setcc all the way to isel and prefer SETGT in some isel patterns.
1283 setCondCodeAction(ISD::SETLT, VT, Custom);
1284 setCondCodeAction(ISD::SETLE, VT, Custom);
1285 }
1286
1287 if (Subtarget.hasAnyFMA()) {
1288 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1289 MVT::v2f64, MVT::v4f64 }) {
1290 setOperationAction(ISD::FMA, VT, Legal);
1291 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1292 }
1293 }
1294
1295 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1296 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1297 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1298 }
1299
1300 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1301 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1302 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1303 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1304
1305 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1306 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1307 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1308 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1309 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1310 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1311
1312 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1313 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1314 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1315 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1316 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1317
1318 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1319 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1320 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1321 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1322 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1323 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1324 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1325 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1326
1327 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1328 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1329 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1330 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1331 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1332 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1333 }
1334
1335 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1336 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1337 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1338 }
1339
1340 if (HasInt256) {
1341 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1342 // when we have a 256bit-wide blend with immediate.
1343 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1344 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1345
1346 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1347 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1348 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1349 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1350 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1351 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1352 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1353 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1354 }
1355 }
1356
1357 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1358 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1359 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1360 setOperationAction(ISD::MSTORE, VT, Legal);
1361 }
1362
1363 // Extract subvector is special because the value type
1364 // (result) is 128-bit but the source is 256-bit wide.
1365 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1366 MVT::v4f32, MVT::v2f64 }) {
1367 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1368 }
1369
1370 // Custom lower several nodes for 256-bit types.
1371 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1372 MVT::v8f32, MVT::v4f64 }) {
1373 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1374 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1375 setOperationAction(ISD::VSELECT, VT, Custom);
1376 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1377 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1378 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1379 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1380 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1381 setOperationAction(ISD::STORE, VT, Custom);
1382 }
1383
1384 if (HasInt256) {
1385 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1386
1387 // Custom legalize 2x32 to get a little better code.
1388 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1389 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1390
1391 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1392 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1393 setOperationAction(ISD::MGATHER, VT, Custom);
1394 }
1395 }
1396
1397 // This block controls legalization of the mask vector sizes that are
1398 // available with AVX512. 512-bit vectors are in a separate block controlled
1399 // by useAVX512Regs.
1400 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1401 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1402 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1403 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1404 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1405 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1406
1407 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1408 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1409 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1410
1411 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1412 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1413 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1414 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1415 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1416 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1417 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1418 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1419 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1420 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1421 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1422 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1423
1424 // There is no byte sized k-register load or store without AVX512DQ.
1425 if (!Subtarget.hasDQI()) {
1426 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1427 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1428 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1429 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1430
1431 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1432 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1433 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1434 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1435 }
1436
1437 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1438 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1439 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1440 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1441 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1442 }
1443
1444 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1445 setOperationAction(ISD::ADD, VT, Custom);
1446 setOperationAction(ISD::SUB, VT, Custom);
1447 setOperationAction(ISD::MUL, VT, Custom);
1448 setOperationAction(ISD::UADDSAT, VT, Custom);
1449 setOperationAction(ISD::SADDSAT, VT, Custom);
1450 setOperationAction(ISD::USUBSAT, VT, Custom);
1451 setOperationAction(ISD::SSUBSAT, VT, Custom);
1452 setOperationAction(ISD::VSELECT, VT, Expand);
1453 }
1454
1455 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1456 setOperationAction(ISD::SETCC, VT, Custom);
1457 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1458 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1459 setOperationAction(ISD::SELECT, VT, Custom);
1460 setOperationAction(ISD::TRUNCATE, VT, Custom);
1461
1462 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1463 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1464 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1465 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1466 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1467 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1468 }
1469
1470 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1471 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1472 }
1473
1474 // This block controls legalization for 512-bit operations with 32/64 bit
1475 // elements. 512-bits can be disabled based on prefer-vector-width and
1476 // required-vector-width function attributes.
1477 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1478 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1479 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1480 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1481 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1482
1483 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1484 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1485 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1486 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1487 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1488 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1489 }
1490
1491 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1492 setOperationAction(ISD::FNEG, VT, Custom);
1493 setOperationAction(ISD::FABS, VT, Custom);
1494 setOperationAction(ISD::FMA, VT, Legal);
1495 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1496 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1497 }
1498
1499 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1500 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1501 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1502 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1503 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1504 }
1505 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1506 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1507 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1508 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1509 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1510 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1511 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1512 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1513
1514 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1515 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1516 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1517 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1518 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1519 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1520 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1521 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1522 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1523 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1524 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1525 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1526
1527 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1528 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1529 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1530 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1531 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1532
1533 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1534 // to 512-bit rather than use the AVX2 instructions so that we can use
1535 // k-masks.
1536 if (!Subtarget.hasVLX()) {
1537 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1538 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1539 setOperationAction(ISD::MLOAD, VT, Custom);
1540 setOperationAction(ISD::MSTORE, VT, Custom);
1541 }
1542 }
1543
1544 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1545 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1546 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1547 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1548 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1549 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1550 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1551 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1552
1553 // Need to custom widen this if we don't have AVX512BW.
1554 setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
1555 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
1556 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
1557
1558 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1559 setOperationAction(ISD::FFLOOR, VT, Legal);
1560 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1561 setOperationAction(ISD::FCEIL, VT, Legal);
1562 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1563 setOperationAction(ISD::FTRUNC, VT, Legal);
1564 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1565 setOperationAction(ISD::FRINT, VT, Legal);
1566 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1567 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1568 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1569
1570 setOperationAction(ISD::FROUND, VT, Custom);
1571
1572 setOperationAction(ISD::SELECT, VT, Custom);
1573 }
1574
1575 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1576 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
1577 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1578 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1579 }
1580
1581 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1582 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1583 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1584 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1585
1586 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1587 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1588
1589 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1590 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1591
1592 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1593 setOperationAction(ISD::SMAX, VT, Legal);
1594 setOperationAction(ISD::UMAX, VT, Legal);
1595 setOperationAction(ISD::SMIN, VT, Legal);
1596 setOperationAction(ISD::UMIN, VT, Legal);
1597 setOperationAction(ISD::ABS, VT, Legal);
1598 setOperationAction(ISD::SRL, VT, Custom);
1599 setOperationAction(ISD::SHL, VT, Custom);
1600 setOperationAction(ISD::SRA, VT, Custom);
1601 setOperationAction(ISD::CTPOP, VT, Custom);
1602 setOperationAction(ISD::ROTL, VT, Custom);
1603 setOperationAction(ISD::ROTR, VT, Custom);
1604 setOperationAction(ISD::SETCC, VT, Custom);
1605 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1606 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1607 setOperationAction(ISD::SELECT, VT, Custom);
1608
1609 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1610 // setcc all the way to isel and prefer SETGT in some isel patterns.
1611 setCondCodeAction(ISD::SETLT, VT, Custom);
1612 setCondCodeAction(ISD::SETLE, VT, Custom);
1613 }
1614
1615 if (Subtarget.hasDQI()) {
1616 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1617 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1618 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1619 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1620 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1621 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1622 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1623 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1624
1625 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1626 }
1627
1628 if (Subtarget.hasCDI()) {
1629 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1630 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1631 setOperationAction(ISD::CTLZ, VT, Legal);
1632 }
1633 } // Subtarget.hasCDI()
1634
1635 if (Subtarget.hasVPOPCNTDQ()) {
1636 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1637 setOperationAction(ISD::CTPOP, VT, Legal);
1638 }
1639
1640 // Extract subvector is special because the value type
1641 // (result) is 256-bit but the source is 512-bit wide.
1642 // 128-bit was made Legal under AVX1.
1643 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1644 MVT::v8f32, MVT::v4f64 })
1645 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1646
1647 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1648 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1649 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1650 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1651 setOperationAction(ISD::VSELECT, VT, Custom);
1652 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1653 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1654 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1655 setOperationAction(ISD::MLOAD, VT, Legal);
1656 setOperationAction(ISD::MSTORE, VT, Legal);
1657 setOperationAction(ISD::MGATHER, VT, Custom);
1658 setOperationAction(ISD::MSCATTER, VT, Custom);
1659 }
1660 if (!Subtarget.hasBWI()) {
1661 // Need to custom split v32i16/v64i8 bitcasts.
1662 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1663 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1664
1665 // Better to split these into two 256-bit ops.
1666 setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom);
1667 setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom);
1668 }
1669
1670 if (Subtarget.hasVBMI2()) {
1671 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1672 setOperationAction(ISD::FSHL, VT, Custom);
1673 setOperationAction(ISD::FSHR, VT, Custom);
1674 }
1675 }
1676 }// has AVX-512
1677
1678 // This block controls legalization for operations that don't have
1679 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1680 // narrower widths.
1681 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1682 // These operations are handled on non-VLX by artificially widening in
1683 // isel patterns.
1684
1685 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1686 Subtarget.hasVLX() ? Legal : Custom);
1687 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1688 Subtarget.hasVLX() ? Legal : Custom);
1689 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1690 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1691 Subtarget.hasVLX() ? Legal : Custom);
1692 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1693 Subtarget.hasVLX() ? Legal : Custom);
1694 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1695 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1696 Subtarget.hasVLX() ? Legal : Custom);
1697 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1698 Subtarget.hasVLX() ? Legal : Custom);
1699 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1700 Subtarget.hasVLX() ? Legal : Custom);
1701 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1702 Subtarget.hasVLX() ? Legal : Custom);
1703
1704 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1705 setOperationAction(ISD::SMAX, VT, Legal);
1706 setOperationAction(ISD::UMAX, VT, Legal);
1707 setOperationAction(ISD::SMIN, VT, Legal);
1708 setOperationAction(ISD::UMIN, VT, Legal);
1709 setOperationAction(ISD::ABS, VT, Legal);
1710 }
1711
1712 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1713 setOperationAction(ISD::ROTL, VT, Custom);
1714 setOperationAction(ISD::ROTR, VT, Custom);
1715 }
1716
1717 // Custom legalize 2x32 to get a little better code.
1718 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1719 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1720
1721 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1722 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1723 setOperationAction(ISD::MSCATTER, VT, Custom);
1724
1725 if (Subtarget.hasDQI()) {
1726 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1727 setOperationAction(ISD::SINT_TO_FP, VT,
1728 Subtarget.hasVLX() ? Legal : Custom);
1729 setOperationAction(ISD::UINT_TO_FP, VT,
1730 Subtarget.hasVLX() ? Legal : Custom);
1731 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1732 Subtarget.hasVLX() ? Legal : Custom);
1733 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1734 Subtarget.hasVLX() ? Legal : Custom);
1735 setOperationAction(ISD::FP_TO_SINT, VT,
1736 Subtarget.hasVLX() ? Legal : Custom);
1737 setOperationAction(ISD::FP_TO_UINT, VT,
1738 Subtarget.hasVLX() ? Legal : Custom);
1739 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1740 Subtarget.hasVLX() ? Legal : Custom);
1741 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1742 Subtarget.hasVLX() ? Legal : Custom);
1743 setOperationAction(ISD::MUL, VT, Legal);
1744 }
1745 }
1746
1747 if (Subtarget.hasCDI()) {
1748 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1749 setOperationAction(ISD::CTLZ, VT, Legal);
1750 }
1751 } // Subtarget.hasCDI()
1752
1753 if (Subtarget.hasVPOPCNTDQ()) {
1754 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1755 setOperationAction(ISD::CTPOP, VT, Legal);
1756 }
1757 }
1758
1759 // This block control legalization of v32i1/v64i1 which are available with
1760 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1761 // useBWIRegs.
1762 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1763 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1764 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1765
1766 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1767 setOperationAction(ISD::ADD, VT, Custom);
1768 setOperationAction(ISD::SUB, VT, Custom);
1769 setOperationAction(ISD::MUL, VT, Custom);
1770 setOperationAction(ISD::VSELECT, VT, Expand);
1771 setOperationAction(ISD::UADDSAT, VT, Custom);
1772 setOperationAction(ISD::SADDSAT, VT, Custom);
1773 setOperationAction(ISD::USUBSAT, VT, Custom);
1774 setOperationAction(ISD::SSUBSAT, VT, Custom);
1775
1776 setOperationAction(ISD::TRUNCATE, VT, Custom);
1777 setOperationAction(ISD::SETCC, VT, Custom);
1778 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1779 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1780 setOperationAction(ISD::SELECT, VT, Custom);
1781 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1782 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1783 }
1784
1785 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1786 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1787 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1788 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1789 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1790 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1791
1792 // Extends from v32i1 masks to 256-bit vectors.
1793 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1794 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1795 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1796 }
1797
1798 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1799 // disabled based on prefer-vector-width and required-vector-width function
1800 // attributes.
1801 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1802 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1803 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1804
1805 // Extends from v64i1 masks to 512-bit vectors.
1806 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1807 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1808 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1809
1810 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1811 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1812 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1813 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1814 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1815 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1816 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1817 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1818 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1819 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1820 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1821 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1822 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1823 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1824 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1825 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1826 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1827 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1828 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1829 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1830 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1831 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1832 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1833
1834 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1835 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1836
1837 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1838
1839 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1840 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1841 setOperationAction(ISD::VSELECT, VT, Custom);
1842 setOperationAction(ISD::ABS, VT, Legal);
1843 setOperationAction(ISD::SRL, VT, Custom);
1844 setOperationAction(ISD::SHL, VT, Custom);
1845 setOperationAction(ISD::SRA, VT, Custom);
1846 setOperationAction(ISD::MLOAD, VT, Legal);
1847 setOperationAction(ISD::MSTORE, VT, Legal);
1848 setOperationAction(ISD::CTPOP, VT, Custom);
1849 setOperationAction(ISD::CTLZ, VT, Custom);
1850 setOperationAction(ISD::SMAX, VT, Legal);
1851 setOperationAction(ISD::UMAX, VT, Legal);
1852 setOperationAction(ISD::SMIN, VT, Legal);
1853 setOperationAction(ISD::UMIN, VT, Legal);
1854 setOperationAction(ISD::SETCC, VT, Custom);
1855 setOperationAction(ISD::UADDSAT, VT, Legal);
1856 setOperationAction(ISD::SADDSAT, VT, Legal);
1857 setOperationAction(ISD::USUBSAT, VT, Legal);
1858 setOperationAction(ISD::SSUBSAT, VT, Legal);
1859 setOperationAction(ISD::SELECT, VT, Custom);
1860
1861 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1862 // setcc all the way to isel and prefer SETGT in some isel patterns.
1863 setCondCodeAction(ISD::SETLT, VT, Custom);
1864 setCondCodeAction(ISD::SETLE, VT, Custom);
1865 }
1866
1867 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1868 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1869 }
1870
1871 if (Subtarget.hasBITALG()) {
1872 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1873 setOperationAction(ISD::CTPOP, VT, Legal);
1874 }
1875
1876 if (Subtarget.hasVBMI2()) {
1877 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1878 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1879 }
1880 }
1881
1882 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1883 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1884 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1885 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1886 }
1887
1888 // These operations are handled on non-VLX by artificially widening in
1889 // isel patterns.
1890 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1891
1892 if (Subtarget.hasBITALG()) {
1893 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1894 setOperationAction(ISD::CTPOP, VT, Legal);
1895 }
1896 }
1897
1898 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1899 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1900 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1901 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1902 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1903 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1904
1905 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1906 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1907 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1908 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1909 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1910
1911 if (Subtarget.hasDQI()) {
1912 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1913 // v2f32 UINT_TO_FP is already custom under SSE2.
1914 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1916, __PRETTY_FUNCTION__))
1915 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1916, __PRETTY_FUNCTION__))
1916 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1916, __PRETTY_FUNCTION__))
;
1917 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1918 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1919 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1920 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1921 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1922 }
1923
1924 if (Subtarget.hasBWI()) {
1925 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1926 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1927 }
1928
1929 if (Subtarget.hasVBMI2()) {
1930 // TODO: Make these legal even without VLX?
1931 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1932 MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1933 setOperationAction(ISD::FSHL, VT, Custom);
1934 setOperationAction(ISD::FSHR, VT, Custom);
1935 }
1936 }
1937
1938 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1939 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1940 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1941 }
1942
1943 // We want to custom lower some of our intrinsics.
1944 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1945 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1946 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1947 if (!Subtarget.is64Bit()) {
1948 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1949 }
1950
1951 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1952 // handle type legalization for these operations here.
1953 //
1954 // FIXME: We really should do custom legalization for addition and
1955 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1956 // than generic legalization for 64-bit multiplication-with-overflow, though.
1957 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1958 if (VT == MVT::i64 && !Subtarget.is64Bit())
1959 continue;
1960 // Add/Sub/Mul with overflow operations are custom lowered.
1961 setOperationAction(ISD::SADDO, VT, Custom);
1962 setOperationAction(ISD::UADDO, VT, Custom);
1963 setOperationAction(ISD::SSUBO, VT, Custom);
1964 setOperationAction(ISD::USUBO, VT, Custom);
1965 setOperationAction(ISD::SMULO, VT, Custom);
1966 setOperationAction(ISD::UMULO, VT, Custom);
1967
1968 // Support carry in as value rather than glue.
1969 setOperationAction(ISD::ADDCARRY, VT, Custom);
1970 setOperationAction(ISD::SUBCARRY, VT, Custom);
1971 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1972 }
1973
1974 if (!Subtarget.is64Bit()) {
1975 // These libcalls are not available in 32-bit.
1976 setLibcallName(RTLIB::SHL_I128, nullptr);
1977 setLibcallName(RTLIB::SRL_I128, nullptr);
1978 setLibcallName(RTLIB::SRA_I128, nullptr);
1979 setLibcallName(RTLIB::MUL_I128, nullptr);
1980 }
1981
1982 // Combine sin / cos into _sincos_stret if it is available.
1983 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1984 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1985 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1986 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1987 }
1988
1989 if (Subtarget.isTargetWin64()) {
1990 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1991 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1992 setOperationAction(ISD::SREM, MVT::i128, Custom);
1993 setOperationAction(ISD::UREM, MVT::i128, Custom);
1994 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1995 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1996 }
1997
1998 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1999 // is. We should promote the value to 64-bits to solve this.
2000 // This is what the CRT headers do - `fmodf` is an inline header
2001 // function casting to f64 and calling `fmod`.
2002 if (Subtarget.is32Bit() &&
2003 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2004 for (ISD::NodeType Op :
2005 {ISD::FCEIL, ISD::STRICT_FCEIL,
2006 ISD::FCOS, ISD::STRICT_FCOS,
2007 ISD::FEXP, ISD::STRICT_FEXP,
2008 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2009 ISD::FREM, ISD::STRICT_FREM,
2010 ISD::FLOG, ISD::STRICT_FLOG,
2011 ISD::FLOG10, ISD::STRICT_FLOG10,
2012 ISD::FPOW, ISD::STRICT_FPOW,
2013 ISD::FSIN, ISD::STRICT_FSIN})
2014 if (isOperationExpand(Op, MVT::f32))
2015 setOperationAction(Op, MVT::f32, Promote);
2016
2017 // We have target-specific dag combine patterns for the following nodes:
2018 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
2019 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
2020 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
2021 setTargetDAGCombine(ISD::CONCAT_VECTORS);
2022 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
2023 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
2024 setTargetDAGCombine(ISD::BITCAST);
2025 setTargetDAGCombine(ISD::VSELECT);
2026 setTargetDAGCombine(ISD::SELECT);
2027 setTargetDAGCombine(ISD::SHL);
2028 setTargetDAGCombine(ISD::SRA);
2029 setTargetDAGCombine(ISD::SRL);
2030 setTargetDAGCombine(ISD::OR);
2031 setTargetDAGCombine(ISD::AND);
2032 setTargetDAGCombine(ISD::ADD);
2033 setTargetDAGCombine(ISD::FADD);
2034 setTargetDAGCombine(ISD::FSUB);
2035 setTargetDAGCombine(ISD::FNEG);
2036 setTargetDAGCombine(ISD::FMA);
2037 setTargetDAGCombine(ISD::STRICT_FMA);
2038 setTargetDAGCombine(ISD::FMINNUM);
2039 setTargetDAGCombine(ISD::FMAXNUM);
2040 setTargetDAGCombine(ISD::SUB);
2041 setTargetDAGCombine(ISD::LOAD);
2042 setTargetDAGCombine(ISD::MLOAD);
2043 setTargetDAGCombine(ISD::STORE);
2044 setTargetDAGCombine(ISD::MSTORE);
2045 setTargetDAGCombine(ISD::TRUNCATE);
2046 setTargetDAGCombine(ISD::ZERO_EXTEND);
2047 setTargetDAGCombine(ISD::ANY_EXTEND);
2048 setTargetDAGCombine(ISD::SIGN_EXTEND);
2049 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2050 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2051 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2052 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2053 setTargetDAGCombine(ISD::SINT_TO_FP);
2054 setTargetDAGCombine(ISD::UINT_TO_FP);
2055 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2056 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2057 setTargetDAGCombine(ISD::SETCC);
2058 setTargetDAGCombine(ISD::MUL);
2059 setTargetDAGCombine(ISD::XOR);
2060 setTargetDAGCombine(ISD::MSCATTER);
2061 setTargetDAGCombine(ISD::MGATHER);
2062 setTargetDAGCombine(ISD::FP16_TO_FP);
2063 setTargetDAGCombine(ISD::FP_EXTEND);
2064 setTargetDAGCombine(ISD::FP_ROUND);
2065
2066 computeRegisterProperties(Subtarget.getRegisterInfo());
2067
2068 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2069 MaxStoresPerMemsetOptSize = 8;
2070 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2071 MaxStoresPerMemcpyOptSize = 4;
2072 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2073 MaxStoresPerMemmoveOptSize = 4;
2074
2075 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2076 // that needs to benchmarked and balanced with the potential use of vector
2077 // load/store types (PR33329, PR33914).
2078 MaxLoadsPerMemcmp = 2;
2079 MaxLoadsPerMemcmpOptSize = 2;
2080
2081 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
2082 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
2083
2084 // An out-of-order CPU can speculatively execute past a predictable branch,
2085 // but a conditional move could be stalled by an expensive earlier operation.
2086 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2087 EnableExtLdPromotion = true;
2088 setPrefFunctionAlignment(Align(16));
2089
2090 verifyIntrinsicTables();
2091
2092 // Default to having -disable-strictnode-mutation on
2093 IsStrictFPEnabled = true;
2094}
2095
2096// This has so far only been implemented for 64-bit MachO.
2097bool X86TargetLowering::useLoadStackGuardNode() const {
2098 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2099}
2100
2101bool X86TargetLowering::useStackGuardXorFP() const {
2102 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2103 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2104}
2105
2106SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2107 const SDLoc &DL) const {
2108 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2109 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2110 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2111 return SDValue(Node, 0);
2112}
2113
2114TargetLoweringBase::LegalizeTypeAction
2115X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2116 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
2117 return TypeSplitVector;
2118
2119 if (VT.getVectorNumElements() != 1 &&
2120 VT.getVectorElementType() != MVT::i1)
2121 return TypeWidenVector;
2122
2123 return TargetLoweringBase::getPreferredVectorAction(VT);
2124}
2125
2126MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2127 CallingConv::ID CC,
2128 EVT VT) const {
2129 // v32i1 vectors should be promoted to v32i8 to match avx2.
2130 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
2131 return MVT::v32i8;
2132 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2133 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2134 Subtarget.hasAVX512() &&
2135 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2136 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
2137 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
2138 return MVT::i8;
2139 // Split v64i1 vectors if we don't have v64i8 available.
2140 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2141 CC != CallingConv::X86_RegCall)
2142 return MVT::v32i1;
2143 // FIXME: Should we just make these types legal and custom split operations?
2144 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
2145 Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
2146 return MVT::v16i32;
2147 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2148}
2149
2150unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2151 CallingConv::ID CC,
2152 EVT VT) const {
2153 // v32i1 vectors should be promoted to v32i8 to match avx2.
2154 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
2155 return 1;
2156 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2157 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2158 Subtarget.hasAVX512() &&
2159 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2160 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
2161 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
2162 return VT.getVectorNumElements();
2163 // Split v64i1 vectors if we don't have v64i8 available.
2164 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2165 CC != CallingConv::X86_RegCall)
2166 return 2;
2167 // FIXME: Should we just make these types legal and custom split operations?
2168 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
2169 Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
2170 return 1;
2171 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2172}
2173
2174unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2175 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2176 unsigned &NumIntermediates, MVT &RegisterVT) const {
2177 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2178 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2179 Subtarget.hasAVX512() &&
2180 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2181 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
2182 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
2183 RegisterVT = MVT::i8;
2184 IntermediateVT = MVT::i1;
2185 NumIntermediates = VT.getVectorNumElements();
2186 return NumIntermediates;
2187 }
2188
2189 // Split v64i1 vectors if we don't have v64i8 available.
2190 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2191 CC != CallingConv::X86_RegCall) {
2192 RegisterVT = MVT::v32i1;
2193 IntermediateVT = MVT::v32i1;
2194 NumIntermediates = 2;
2195 return 2;
2196 }
2197
2198 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2199 NumIntermediates, RegisterVT);
2200}
2201
2202EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2203 LLVMContext& Context,
2204 EVT VT) const {
2205 if (!VT.isVector())
2206 return MVT::i8;
2207
2208 if (Subtarget.hasAVX512()) {
2209 const unsigned NumElts = VT.getVectorNumElements();
2210
2211 // Figure out what this type will be legalized to.
2212 EVT LegalVT = VT;
2213 while (getTypeAction(Context, LegalVT) != TypeLegal)
2214 LegalVT = getTypeToTransformTo(Context, LegalVT);
2215
2216 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2217 if (LegalVT.getSimpleVT().is512BitVector())
2218 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2219
2220 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2221 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2222 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2223 // vXi16/vXi8.
2224 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2225 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2226 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2227 }
2228 }
2229
2230 return VT.changeVectorElementTypeToInteger();
2231}
2232
2233/// Helper for getByValTypeAlignment to determine
2234/// the desired ByVal argument alignment.
2235static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
2236 if (MaxAlign == 16)
2237 return;
2238 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2239 if (VTy->getBitWidth() == 128)
2240 MaxAlign = 16;
2241 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2242 unsigned EltAlign = 0;
2243 getMaxByValAlign(ATy->getElementType(), EltAlign);
2244 if (EltAlign > MaxAlign)
2245 MaxAlign = EltAlign;
2246 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2247 for (auto *EltTy : STy->elements()) {
2248 unsigned EltAlign = 0;
2249 getMaxByValAlign(EltTy, EltAlign);
2250 if (EltAlign > MaxAlign)
2251 MaxAlign = EltAlign;
2252 if (MaxAlign == 16)
2253 break;
2254 }
2255 }
2256}
2257
2258/// Return the desired alignment for ByVal aggregate
2259/// function arguments in the caller parameter area. For X86, aggregates
2260/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2261/// are at 4-byte boundaries.
2262unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2263 const DataLayout &DL) const {
2264 if (Subtarget.is64Bit()) {
2265 // Max of 8 and alignment of type.
2266 unsigned TyAlign = DL.getABITypeAlignment(Ty);
2267 if (TyAlign > 8)
2268 return TyAlign;
2269 return 8;
2270 }
2271
2272 unsigned Align = 4;
2273 if (Subtarget.hasSSE1())
2274 getMaxByValAlign(Ty, Align);
2275 return Align;
2276}
2277
2278/// It returns EVT::Other if the type should be determined using generic
2279/// target-independent logic.
2280/// For vector ops we check that the overall size isn't larger than our
2281/// preferred vector width.
2282EVT X86TargetLowering::getOptimalMemOpType(
2283 const MemOp &Op, const AttributeList &FuncAttributes) const {
2284 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2285 if (Op.size() >= 16 &&
2286 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2287 // FIXME: Check if unaligned 64-byte accesses are slow.
2288 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2289 (Subtarget.getPreferVectorWidth() >= 512)) {
2290 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2291 }
2292 // FIXME: Check if unaligned 32-byte accesses are slow.
2293 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2294 (Subtarget.getPreferVectorWidth() >= 256)) {
2295 // Although this isn't a well-supported type for AVX1, we'll let
2296 // legalization and shuffle lowering produce the optimal codegen. If we
2297 // choose an optimal type with a vector element larger than a byte,
2298 // getMemsetStores() may create an intermediate splat (using an integer
2299 // multiply) before we splat as a vector.
2300 return MVT::v32i8;
2301 }
2302 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2303 return MVT::v16i8;
2304 // TODO: Can SSE1 handle a byte vector?
2305 // If we have SSE1 registers we should be able to use them.
2306 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2307 (Subtarget.getPreferVectorWidth() >= 128))
2308 return MVT::v4f32;
2309 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2310 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2311 // Do not use f64 to lower memcpy if source is string constant. It's
2312 // better to use i32 to avoid the loads.
2313 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2314 // The gymnastics of splatting a byte value into an XMM register and then
2315 // only using 8-byte stores (because this is a CPU with slow unaligned
2316 // 16-byte accesses) makes that a loser.
2317 return MVT::f64;
2318 }
2319 }
2320 // This is a compromise. If we reach here, unaligned accesses may be slow on
2321 // this target. However, creating smaller, aligned accesses could be even
2322 // slower and would certainly be a lot more code.
2323 if (Subtarget.is64Bit() && Op.size() >= 8)
2324 return MVT::i64;
2325 return MVT::i32;
2326}
2327
2328bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2329 if (VT == MVT::f32)
2330 return X86ScalarSSEf32;
2331 else if (VT == MVT::f64)
2332 return X86ScalarSSEf64;
2333 return true;
2334}
2335
2336bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2337 EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
2338 bool *Fast) const {
2339 if (Fast) {
2340 switch (VT.getSizeInBits()) {
2341 default:
2342 // 8-byte and under are always assumed to be fast.
2343 *Fast = true;
2344 break;
2345 case 128:
2346 *Fast = !Subtarget.isUnalignedMem16Slow();
2347 break;
2348 case 256:
2349 *Fast = !Subtarget.isUnalignedMem32Slow();
2350 break;
2351 // TODO: What about AVX-512 (512-bit) accesses?
2352 }
2353 }
2354 // NonTemporal vector memory ops must be aligned.
2355 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2356 // NT loads can only be vector aligned, so if its less aligned than the
2357 // minimum vector size (which we can split the vector down to), we might as
2358 // well use a regular unaligned vector load.
2359 // We don't have any NT loads pre-SSE41.
2360 if (!!(Flags & MachineMemOperand::MOLoad))
2361 return (Align < 16 || !Subtarget.hasSSE41());
2362 return false;
2363 }
2364 // Misaligned accesses of any size are always allowed.
2365 return true;
2366}
2367
2368/// Return the entry encoding for a jump table in the
2369/// current function. The returned value is a member of the
2370/// MachineJumpTableInfo::JTEntryKind enum.
2371unsigned X86TargetLowering::getJumpTableEncoding() const {
2372 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2373 // symbol.
2374 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2375 return MachineJumpTableInfo::EK_Custom32;
2376
2377 // Otherwise, use the normal jump table encoding heuristics.
2378 return TargetLowering::getJumpTableEncoding();
2379}
2380
2381bool X86TargetLowering::useSoftFloat() const {
2382 return Subtarget.useSoftFloat();
2383}
2384
2385void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2386 ArgListTy &Args) const {
2387
2388 // Only relabel X86-32 for C / Stdcall CCs.
2389 if (Subtarget.is64Bit())
2390 return;
2391 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2392 return;
2393 unsigned ParamRegs = 0;
2394 if (auto *M = MF->getFunction().getParent())
2395 ParamRegs = M->getNumberRegisterParameters();
2396
2397 // Mark the first N int arguments as having reg
2398 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2399 Type *T = Args[Idx].Ty;
2400 if (T->isIntOrPtrTy())
2401 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2402 unsigned numRegs = 1;
2403 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2404 numRegs = 2;
2405 if (ParamRegs < numRegs)
2406 return;
2407 ParamRegs -= numRegs;
2408 Args[Idx].IsInReg = true;
2409 }
2410 }
2411}
2412
2413const MCExpr *
2414X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2415 const MachineBasicBlock *MBB,
2416 unsigned uid,MCContext &Ctx) const{
2417 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2417, __PRETTY_FUNCTION__))
;
2418 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2419 // entries.
2420 return MCSymbolRefExpr::create(MBB->getSymbol(),
2421 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2422}
2423
2424/// Returns relocation base for the given PIC jumptable.
2425SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2426 SelectionDAG &DAG) const {
2427 if (!Subtarget.is64Bit())
2428 // This doesn't have SDLoc associated with it, but is not really the
2429 // same as a Register.
2430 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2431 getPointerTy(DAG.getDataLayout()));
2432 return Table;
2433}
2434
2435/// This returns the relocation base for the given PIC jumptable,
2436/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2437const MCExpr *X86TargetLowering::
2438getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2439 MCContext &Ctx) const {
2440 // X86-64 uses RIP relative addressing based on the jump table label.
2441 if (Subtarget.isPICStyleRIPRel())
2442 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2443
2444 // Otherwise, the reference is relative to the PIC base.
2445 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2446}
2447
2448std::pair<const TargetRegisterClass *, uint8_t>
2449X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2450 MVT VT) const {
2451 const TargetRegisterClass *RRC = nullptr;
2452 uint8_t Cost = 1;
2453 switch (VT.SimpleTy) {
2454 default:
2455 return TargetLowering::findRepresentativeClass(TRI, VT);
2456 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2457 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2458 break;
2459 case MVT::x86mmx:
2460 RRC = &X86::VR64RegClass;
2461 break;
2462 case MVT::f32: case MVT::f64:
2463 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2464 case MVT::v4f32: case MVT::v2f64:
2465 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2466 case MVT::v8f32: case MVT::v4f64:
2467 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2468 case MVT::v16f32: case MVT::v8f64:
2469 RRC = &X86::VR128XRegClass;
2470 break;
2471 }
2472 return std::make_pair(RRC, Cost);
2473}
2474
2475unsigned X86TargetLowering::getAddressSpace() const {
2476 if (Subtarget.is64Bit())
2477 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2478 return 256;
2479}
2480
2481static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2482 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2483 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2484}
2485
2486static Constant* SegmentOffset(IRBuilder<> &IRB,
2487 unsigned Offset, unsigned AddressSpace) {
2488 return ConstantExpr::getIntToPtr(
2489 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2490 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2491}
2492
2493Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2494 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2495 // tcbhead_t; use it instead of the usual global variable (see
2496 // sysdeps/{i386,x86_64}/nptl/tls.h)
2497 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2498 if (Subtarget.isTargetFuchsia()) {
2499 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2500 return SegmentOffset(IRB, 0x10, getAddressSpace());
2501 } else {
2502 // %fs:0x28, unless we're using a Kernel code model, in which case
2503 // it's %gs:0x28. gs:0x14 on i386.
2504 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2505 return SegmentOffset(IRB, Offset, getAddressSpace());
2506 }
2507 }
2508
2509 return TargetLowering::getIRStackGuard(IRB);
2510}
2511
2512void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2513 // MSVC CRT provides functionalities for stack protection.
2514 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2515 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2516 // MSVC CRT has a global variable holding security cookie.
2517 M.getOrInsertGlobal("__security_cookie",
2518 Type::getInt8PtrTy(M.getContext()));
2519
2520 // MSVC CRT has a function to validate security cookie.
2521 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2522 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2523 Type::getInt8PtrTy(M.getContext()));
2524 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2525 F->setCallingConv(CallingConv::X86_FastCall);
2526 F->addAttribute(1, Attribute::AttrKind::InReg);
2527 }
2528 return;
2529 }
2530 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2531 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2532 return;
2533 TargetLowering::insertSSPDeclarations(M);
2534}
2535
2536Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2537 // MSVC CRT has a global variable holding security cookie.
2538 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2539 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2540 return M.getGlobalVariable("__security_cookie");
2541 }
2542 return TargetLowering::getSDagStackGuard(M);
2543}
2544
2545Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2546 // MSVC CRT has a function to validate security cookie.
2547 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2548 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2549 return M.getFunction("__security_check_cookie");
2550 }
2551 return TargetLowering::getSSPStackGuardCheck(M);
2552}
2553
2554Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2555 if (Subtarget.getTargetTriple().isOSContiki())
2556 return getDefaultSafeStackPointerLocation(IRB, false);
2557
2558 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2559 // definition of TLS_SLOT_SAFESTACK in
2560 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2561 if (Subtarget.isTargetAndroid()) {
2562 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2563 // %gs:0x24 on i386
2564 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2565 return SegmentOffset(IRB, Offset, getAddressSpace());
2566 }
2567
2568 // Fuchsia is similar.
2569 if (Subtarget.isTargetFuchsia()) {
2570 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2571 return SegmentOffset(IRB, 0x18, getAddressSpace());
2572 }
2573
2574 return TargetLowering::getSafeStackPointerLocation(IRB);
2575}
2576
2577bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2578 unsigned DestAS) const {
2579 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2579, __PRETTY_FUNCTION__))
;
2580
2581 const TargetMachine &TM = getTargetMachine();
2582 if (TM.getPointerSize(SrcAS) != TM.getPointerSize(DestAS))
2583 return false;
2584
2585 return SrcAS < 256 && DestAS < 256;
2586}
2587
2588//===----------------------------------------------------------------------===//
2589// Return Value Calling Convention Implementation
2590//===----------------------------------------------------------------------===//
2591
2592bool X86TargetLowering::CanLowerReturn(
2593 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2594 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2595 SmallVector<CCValAssign, 16> RVLocs;
2596 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2597 return CCInfo.CheckReturn(Outs, RetCC_X86);
2598}
2599
2600const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2601 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2602 return ScratchRegs;
2603}
2604
2605/// Lowers masks values (v*i1) to the local register values
2606/// \returns DAG node after lowering to register type
2607static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2608 const SDLoc &Dl, SelectionDAG &DAG) {
2609 EVT ValVT = ValArg.getValueType();
2610
2611 if (ValVT == MVT::v1i1)
2612 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2613 DAG.getIntPtrConstant(0, Dl));
2614
2615 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2616 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2617 // Two stage lowering might be required
2618 // bitcast: v8i1 -> i8 / v16i1 -> i16
2619 // anyextend: i8 -> i32 / i16 -> i32
2620 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2621 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2622 if (ValLoc == MVT::i32)
2623 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2624 return ValToCopy;
2625 }
2626
2627 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2628 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2629 // One stage lowering is required
2630 // bitcast: v32i1 -> i32 / v64i1 -> i64
2631 return DAG.getBitcast(ValLoc, ValArg);
2632 }
2633
2634 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2635}
2636
2637/// Breaks v64i1 value into two registers and adds the new node to the DAG
2638static void Passv64i1ArgInRegs(
2639 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2640 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, CCValAssign &VA,
2641 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2642 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2642, __PRETTY_FUNCTION__))
;
2643 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2643, __PRETTY_FUNCTION__))
;
2644 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2644, __PRETTY_FUNCTION__))
;
2645 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2646, __PRETTY_FUNCTION__))
2646 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2646, __PRETTY_FUNCTION__))
;
2647
2648 // Before splitting the value we cast it to i64
2649 Arg = DAG.getBitcast(MVT::i64, Arg);
2650
2651 // Splitting the value into two i32 types
2652 SDValue Lo, Hi;
2653 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2654 DAG.getConstant(0, Dl, MVT::i32));
2655 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2656 DAG.getConstant(1, Dl, MVT::i32));
2657
2658 // Attach the two i32 types into corresponding registers
2659 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2660 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2661}
2662
2663SDValue
2664X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2665 bool isVarArg,
2666 const SmallVectorImpl<ISD::OutputArg> &Outs,
2667 const SmallVectorImpl<SDValue> &OutVals,
2668 const SDLoc &dl, SelectionDAG &DAG) const {
2669 MachineFunction &MF = DAG.getMachineFunction();
2670 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2671
2672 // In some cases we need to disable registers from the default CSR list.
2673 // For example, when they are used for argument passing.
2674 bool ShouldDisableCalleeSavedRegister =
2675 CallConv == CallingConv::X86_RegCall ||
2676 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2677
2678 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2679 report_fatal_error("X86 interrupts may not return any value");
2680
2681 SmallVector<CCValAssign, 16> RVLocs;
2682 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2683 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2684
2685 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
2686 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2687 ++I, ++OutsIndex) {
2688 CCValAssign &VA = RVLocs[I];
2689 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2689, __PRETTY_FUNCTION__))
;
2690
2691 // Add the register to the CalleeSaveDisableRegs list.
2692 if (ShouldDisableCalleeSavedRegister)
2693 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2694
2695 SDValue ValToCopy = OutVals[OutsIndex];
2696 EVT ValVT = ValToCopy.getValueType();
2697
2698 // Promote values to the appropriate types.
2699 if (VA.getLocInfo() == CCValAssign::SExt)
2700 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2701 else if (VA.getLocInfo() == CCValAssign::ZExt)
2702 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2703 else if (VA.getLocInfo() == CCValAssign::AExt) {
2704 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2705 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2706 else
2707 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2708 }
2709 else if (VA.getLocInfo() == CCValAssign::BCvt)
2710 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2711
2712 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2713, __PRETTY_FUNCTION__))
2713 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2713, __PRETTY_FUNCTION__))
;
2714
2715 // Report an error if we have attempted to return a value via an XMM
2716 // register and SSE was disabled.
2717 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2718 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2719 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2720 } else if (!Subtarget.hasSSE2() &&
2721 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2722 ValVT == MVT::f64) {
2723 // When returning a double via an XMM register, report an error if SSE2 is
2724 // not enabled.
2725 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2726 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2727 }
2728
2729 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2730 // the RET instruction and handled by the FP Stackifier.
2731 if (VA.getLocReg() == X86::FP0 ||
2732 VA.getLocReg() == X86::FP1) {
2733 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2734 // change the value to the FP stack register class.
2735 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2736 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2737 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2738 // Don't emit a copytoreg.
2739 continue;
2740 }
2741
2742 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2743 // which is returned in RAX / RDX.
2744 if (Subtarget.is64Bit()) {
2745 if (ValVT == MVT::x86mmx) {
2746 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2747 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2748 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2749 ValToCopy);
2750 // If we don't have SSE2 available, convert to v4f32 so the generated
2751 // register is legal.
2752 if (!Subtarget.hasSSE2())
2753 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2754 }
2755 }
2756 }
2757
2758 if (VA.needsCustom()) {
2759 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2760, __PRETTY_FUNCTION__))
2760 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2760, __PRETTY_FUNCTION__))
;
2761
2762 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2763 Subtarget);
2764
2765 // Add the second register to the CalleeSaveDisableRegs list.
2766 if (ShouldDisableCalleeSavedRegister)
2767 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2768 } else {
2769 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2770 }
2771 }
2772
2773 SDValue Flag;
2774 SmallVector<SDValue, 6> RetOps;
2775 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2776 // Operand #1 = Bytes To Pop
2777 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2778 MVT::i32));
2779
2780 // Copy the result values into the output registers.
2781 for (auto &RetVal : RetVals) {
2782 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2783 RetOps.push_back(RetVal.second);
2784 continue; // Don't emit a copytoreg.
2785 }
2786
2787 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2788 Flag = Chain.getValue(1);
2789 RetOps.push_back(
2790 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
2791 }
2792
2793 // Swift calling convention does not require we copy the sret argument
2794 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2795
2796 // All x86 ABIs require that for returning structs by value we copy
2797 // the sret argument into %rax/%eax (depending on ABI) for the return.
2798 // We saved the argument into a virtual register in the entry block,
2799 // so now we copy the value out and into %rax/%eax.
2800 //
2801 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2802 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2803 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2804 // either case FuncInfo->setSRetReturnReg() will have been called.
2805 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2806 // When we have both sret and another return value, we should use the
2807 // original Chain stored in RetOps[0], instead of the current Chain updated
2808 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2809
2810 // For the case of sret and another return value, we have
2811 // Chain_0 at the function entry
2812 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2813 // If we use Chain_1 in getCopyFromReg, we will have
2814 // Val = getCopyFromReg(Chain_1)
2815 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2816
2817 // getCopyToReg(Chain_0) will be glued together with
2818 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2819 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2820 // Data dependency from Unit B to Unit A due to usage of Val in
2821 // getCopyToReg(Chain_1, Val)
2822 // Chain dependency from Unit A to Unit B
2823
2824 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2825 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2826 getPointerTy(MF.getDataLayout()));
2827
2828 unsigned RetValReg
2829 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2830 X86::RAX : X86::EAX;
2831 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2832 Flag = Chain.getValue(1);
2833
2834 // RAX/EAX now acts like a return value.
2835 RetOps.push_back(
2836 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2837
2838 // Add the returned register to the CalleeSaveDisableRegs list.
2839 if (ShouldDisableCalleeSavedRegister)
2840 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2841 }
2842
2843 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2844 const MCPhysReg *I =
2845 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2846 if (I) {
2847 for (; *I; ++I) {
2848 if (X86::GR64RegClass.contains(*I))
2849 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2850 else
2851 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2851)
;
2852 }
2853 }
2854
2855 RetOps[0] = Chain; // Update chain.
2856
2857 // Add the flag if we have it.
2858 if (Flag.getNode())
2859 RetOps.push_back(Flag);
2860
2861 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2862 if (CallConv == CallingConv::X86_INTR)
2863 opcode = X86ISD::IRET;
2864 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2865}
2866
2867bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2868 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2869 return false;
2870
2871 SDValue TCChain = Chain;
2872 SDNode *Copy = *N->use_begin();
2873 if (Copy->getOpcode() == ISD::CopyToReg) {
2874 // If the copy has a glue operand, we conservatively assume it isn't safe to
2875 // perform a tail call.
2876 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2877 return false;
2878 TCChain = Copy->getOperand(0);
2879 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2880 return false;
2881
2882 bool HasRet = false;
2883 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2884 UI != UE; ++UI) {
2885 if (UI->getOpcode() != X86ISD::RET_FLAG)
2886 return false;
2887 // If we are returning more than one value, we can definitely
2888 // not make a tail call see PR19530
2889 if (UI->getNumOperands() > 4)
2890 return false;
2891 if (UI->getNumOperands() == 4 &&
2892 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2893 return false;
2894 HasRet = true;
2895 }
2896
2897 if (!HasRet)
2898 return false;
2899
2900 Chain = TCChain;
2901 return true;
2902}
2903
2904EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2905 ISD::NodeType ExtendKind) const {
2906 MVT ReturnMVT = MVT::i32;
2907
2908 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2909 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2910 // The ABI does not require i1, i8 or i16 to be extended.
2911 //
2912 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2913 // always extending i8/i16 return values, so keep doing that for now.
2914 // (PR26665).
2915 ReturnMVT = MVT::i8;
2916 }
2917
2918 EVT MinVT = getRegisterType(Context, ReturnMVT);
2919 return VT.bitsLT(MinVT) ? MinVT : VT;
2920}
2921
2922/// Reads two 32 bit registers and creates a 64 bit mask value.
2923/// \param VA The current 32 bit value that need to be assigned.
2924/// \param NextVA The next 32 bit value that need to be assigned.
2925/// \param Root The parent DAG node.
2926/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2927/// glue purposes. In the case the DAG is already using
2928/// physical register instead of virtual, we should glue
2929/// our new SDValue to InFlag SDvalue.
2930/// \return a new SDvalue of size 64bit.
2931static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2932 SDValue &Root, SelectionDAG &DAG,
2933 const SDLoc &Dl, const X86Subtarget &Subtarget,
2934 SDValue *InFlag = nullptr) {
2935 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2935, __PRETTY_FUNCTION__))
;
2936 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2936, __PRETTY_FUNCTION__))
;
2937 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2938, __PRETTY_FUNCTION__))
2938 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2938, __PRETTY_FUNCTION__))
;
2939 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2940, __PRETTY_FUNCTION__))
2940 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2940, __PRETTY_FUNCTION__))
;
2941 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2942, __PRETTY_FUNCTION__))
2942 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2942, __PRETTY_FUNCTION__))
;
2943
2944 SDValue Lo, Hi;
2945 SDValue ArgValueLo, ArgValueHi;
2946
2947 MachineFunction &MF = DAG.getMachineFunction();
2948 const TargetRegisterClass *RC = &X86::GR32RegClass;
2949
2950 // Read a 32 bit value from the registers.
2951 if (nullptr == InFlag) {
2952 // When no physical register is present,
2953 // create an intermediate virtual register.
2954 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2955 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2956 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2957 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2958 } else {
2959 // When a physical register is available read the value from it and glue
2960 // the reads together.
2961 ArgValueLo =
2962 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2963 *InFlag = ArgValueLo.getValue(2);
2964 ArgValueHi =
2965 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2966 *InFlag = ArgValueHi.getValue(2);
2967 }
2968
2969 // Convert the i32 type into v32i1 type.
2970 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2971
2972 // Convert the i32 type into v32i1 type.
2973 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2974
2975 // Concatenate the two values together.
2976 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2977}
2978
2979/// The function will lower a register of various sizes (8/16/32/64)
2980/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2981/// \returns a DAG node contains the operand after lowering to mask type.
2982static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2983 const EVT &ValLoc, const SDLoc &Dl,
2984 SelectionDAG &DAG) {
2985 SDValue ValReturned = ValArg;
2986
2987 if (ValVT == MVT::v1i1)
2988 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2989
2990 if (ValVT == MVT::v64i1) {
2991 // In 32 bit machine, this case is handled by getv64i1Argument
2992 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2992, __PRETTY_FUNCTION__))
;
2993 // In 64 bit machine, There is no need to truncate the value only bitcast
2994 } else {
2995 MVT maskLen;
2996 switch (ValVT.getSimpleVT().SimpleTy) {
2997 case MVT::v8i1:
2998 maskLen = MVT::i8;
2999 break;
3000 case MVT::v16i1:
3001 maskLen = MVT::i16;
3002 break;
3003 case MVT::v32i1:
3004 maskLen = MVT::i32;
3005 break;
3006 default:
3007 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3007)
;
3008 }
3009
3010 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3011 }
3012 return DAG.getBitcast(ValVT, ValReturned);
3013}
3014
3015/// Lower the result values of a call into the
3016/// appropriate copies out of appropriate physical registers.
3017///
3018SDValue X86TargetLowering::LowerCallResult(
3019 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3020 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3021 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3022 uint32_t *RegMask) const {
3023
3024 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3025 // Assign locations to each value returned by this call.
3026 SmallVector<CCValAssign, 16> RVLocs;
3027 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3028 *DAG.getContext());
3029 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3030
3031 // Copy all of the result registers out of their specified physreg.
3032 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3033 ++I, ++InsIndex) {
3034 CCValAssign &VA = RVLocs[I];
3035 EVT CopyVT = VA.getLocVT();
3036
3037 // In some calling conventions we need to remove the used registers
3038 // from the register mask.
3039 if (RegMask) {
3040 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3041 SubRegs.isValid(); ++SubRegs)
3042 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3043 }
3044
3045 // Report an error if there was an attempt to return FP values via XMM
3046 // registers.
3047 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3048 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3049 if (VA.getLocReg() == X86::XMM1)
3050 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3051 else
3052 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3053 } else if (!Subtarget.hasSSE2() &&
3054 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3055 CopyVT == MVT::f64) {
3056 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3057 if (VA.getLocReg() == X86::XMM1)
3058 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3059 else
3060 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3061 }
3062
3063 // If we prefer to use the value in xmm registers, copy it out as f80 and
3064 // use a truncate to move it from fp stack reg to xmm reg.
3065 bool RoundAfterCopy = false;
3066 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3067 isScalarFPTypeInSSEReg(VA.getValVT())) {
3068 if (!Subtarget.hasX87())
3069 report_fatal_error("X87 register return with X87 disabled");
3070 CopyVT = MVT::f80;
3071 RoundAfterCopy = (CopyVT != VA.getLocVT());
3072 }
3073
3074 SDValue Val;
3075 if (VA.needsCustom()) {
3076 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3077, __PRETTY_FUNCTION__))
3077 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3077, __PRETTY_FUNCTION__))
;
3078 Val =
3079 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3080 } else {
3081 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3082 .getValue(1);
3083 Val = Chain.getValue(0);
3084 InFlag = Chain.getValue(2);
3085 }
3086
3087 if (RoundAfterCopy)
3088 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3089 // This truncation won't change the value.
3090 DAG.getIntPtrConstant(1, dl));
3091
3092 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
3093 if (VA.getValVT().isVector() &&
3094 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3095 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3096 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3097 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3098 } else
3099 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3100 }
3101
3102 if (VA.getLocInfo() == CCValAssign::BCvt)
3103 Val = DAG.getBitcast(VA.getValVT(), Val);
3104
3105 InVals.push_back(Val);
3106 }
3107
3108 return Chain;
3109}
3110
3111//===----------------------------------------------------------------------===//
3112// C & StdCall & Fast Calling Convention implementation
3113//===----------------------------------------------------------------------===//
3114// StdCall calling convention seems to be standard for many Windows' API
3115// routines and around. It differs from C calling convention just a little:
3116// callee should clean up the stack, not caller. Symbols should be also
3117// decorated in some fancy way :) It doesn't support any vector arguments.
3118// For info on fast calling convention see Fast Calling Convention (tail call)
3119// implementation LowerX86_32FastCCCallTo.
3120
3121/// CallIsStructReturn - Determines whether a call uses struct return
3122/// semantics.
3123enum StructReturnType {
3124 NotStructReturn,
3125 RegStructReturn,
3126 StackStructReturn
3127};
3128static StructReturnType
3129callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3130 if (Outs.empty())
3131 return NotStructReturn;
3132
3133 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3134 if (!Flags.isSRet())
3135 return NotStructReturn;
3136 if (Flags.isInReg() || IsMCU)
3137 return RegStructReturn;
3138 return StackStructReturn;
3139}
3140
3141/// Determines whether a function uses struct return semantics.
3142static StructReturnType
3143argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3144 if (Ins.empty())
3145 return NotStructReturn;
3146
3147 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3148 if (!Flags.isSRet())
3149 return NotStructReturn;
3150 if (Flags.isInReg() || IsMCU)
3151 return RegStructReturn;
3152 return StackStructReturn;
3153}
3154
3155/// Make a copy of an aggregate at address specified by "Src" to address
3156/// "Dst" with size and alignment information specified by the specific
3157/// parameter attribute. The copy will be passed as a byval function parameter.
3158static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3159 SDValue Chain, ISD::ArgFlagsTy Flags,
3160 SelectionDAG &DAG, const SDLoc &dl) {
3161 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
3162
3163 return DAG.getMemcpy(
3164 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3165 /*isVolatile*/ false, /*AlwaysInline=*/true,
3166 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3167}
3168
3169/// Return true if the calling convention is one that we can guarantee TCO for.
3170static bool canGuaranteeTCO(CallingConv::ID CC) {
3171 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3172 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3173 CC == CallingConv::HHVM || CC == CallingConv::Tail);
3174}
3175
3176/// Return true if we might ever do TCO for calls with this calling convention.
3177static bool mayTailCallThisCC(CallingConv::ID CC) {
3178 switch (CC) {
3179 // C calling conventions:
3180 case CallingConv::C:
3181 case CallingConv::Win64:
3182 case CallingConv::X86_64_SysV:
3183 // Callee pop conventions:
3184 case CallingConv::X86_ThisCall:
3185 case CallingConv::X86_StdCall:
3186 case CallingConv::X86_VectorCall:
3187 case CallingConv::X86_FastCall:
3188 // Swift:
3189 case CallingConv::Swift:
3190 return true;
3191 default:
3192 return canGuaranteeTCO(CC);
3193 }
3194}
3195
3196/// Return true if the function is being made into a tailcall target by
3197/// changing its ABI.
3198static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3199 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
3200}
3201
3202bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3203 if (!CI->isTailCall())
3204 return false;
3205
3206 ImmutableCallSite CS(CI);
3207 CallingConv::ID CalleeCC = CS.getCallingConv();
3208 if (!mayTailCallThisCC(CalleeCC))
3209 return false;
3210
3211 return true;
3212}
3213
3214SDValue
3215X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3216 const SmallVectorImpl<ISD::InputArg> &Ins,
3217 const SDLoc &dl, SelectionDAG &DAG,
3218 const CCValAssign &VA,
3219 MachineFrameInfo &MFI, unsigned i) const {
3220 // Create the nodes corresponding to a load from this parameter slot.
3221 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3222 bool AlwaysUseMutable = shouldGuaranteeTCO(
3223 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3224 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3225 EVT ValVT;
3226 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3227
3228 // If value is passed by pointer we have address passed instead of the value
3229 // itself. No need to extend if the mask value and location share the same
3230 // absolute size.
3231 bool ExtendedInMem =
3232 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3233 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3234
3235 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3236 ValVT = VA.getLocVT();
3237 else
3238 ValVT = VA.getValVT();
3239
3240 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3241 // changed with more analysis.
3242 // In case of tail call optimization mark all arguments mutable. Since they
3243 // could be overwritten by lowering of arguments in case of a tail call.
3244 if (Flags.isByVal()) {
3245 unsigned Bytes = Flags.getByValSize();
3246 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3247
3248 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3249 // can be improved with deeper analysis.
3250 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3251 /*isAliased=*/true);
3252 return DAG.getFrameIndex(FI, PtrVT);
3253 }
3254
3255 // This is an argument in memory. We might be able to perform copy elision.
3256 // If the argument is passed directly in memory without any extension, then we
3257 // can perform copy elision. Large vector types, for example, may be passed
3258 // indirectly by pointer.
3259 if (Flags.isCopyElisionCandidate() &&
3260 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
3261 EVT ArgVT = Ins[i].ArgVT;
3262 SDValue PartAddr;
3263 if (Ins[i].PartOffset == 0) {
3264 // If this is a one-part value or the first part of a multi-part value,
3265 // create a stack object for the entire argument value type and return a
3266 // load from our portion of it. This assumes that if the first part of an
3267 // argument is in memory, the rest will also be in memory.
3268 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3269 /*IsImmutable=*/false);
3270 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3271 return DAG.getLoad(
3272 ValVT, dl, Chain, PartAddr,
3273 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3274 } else {
3275 // This is not the first piece of an argument in memory. See if there is
3276 // already a fixed stack object including this offset. If so, assume it
3277 // was created by the PartOffset == 0 branch above and create a load from
3278 // the appropriate offset into it.
3279 int64_t PartBegin = VA.getLocMemOffset();
3280 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3281 int FI = MFI.getObjectIndexBegin();
3282 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3283 int64_t ObjBegin = MFI.getObjectOffset(FI);
3284 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3285 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3286 break;
3287 }
3288 if (MFI.isFixedObjectIndex(FI)) {
3289 SDValue Addr =
3290 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3291 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3292 return DAG.getLoad(
3293 ValVT, dl, Chain, Addr,
3294 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3295 Ins[i].PartOffset));
3296 }
3297 }
3298 }
3299
3300 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3301 VA.getLocMemOffset(), isImmutable);
3302
3303 // Set SExt or ZExt flag.
3304 if (VA.getLocInfo() == CCValAssign::ZExt) {
3305 MFI.setObjectZExt(FI, true);
3306 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3307 MFI.setObjectSExt(FI, true);
3308 }
3309
3310 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3311 SDValue Val = DAG.getLoad(
3312 ValVT, dl, Chain, FIN,
3313 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3314 return ExtendedInMem
3315 ? (VA.getValVT().isVector()
3316 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3317 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3318 : Val;
3319}
3320
3321// FIXME: Get this from tablegen.
3322static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3323 const X86Subtarget &Subtarget) {
3324 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3324, __PRETTY_FUNCTION__))
;
3325
3326 if (Subtarget.isCallingConvWin64(CallConv)) {
3327 static const MCPhysReg GPR64ArgRegsWin64[] = {
3328 X86::RCX, X86::RDX, X86::R8, X86::R9
3329 };
3330 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3331 }
3332
3333 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3334 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3335 };
3336 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3337}
3338
3339// FIXME: Get this from tablegen.
3340static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3341 CallingConv::ID CallConv,
3342 const X86Subtarget &Subtarget) {
3343 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3343, __PRETTY_FUNCTION__))
;
3344 if (Subtarget.isCallingConvWin64(CallConv)) {
3345 // The XMM registers which might contain var arg parameters are shadowed
3346 // in their paired GPR. So we only need to save the GPR to their home
3347 // slots.
3348 // TODO: __vectorcall will change this.
3349 return None;
3350 }
3351
3352 const Function &F = MF.getFunction();
3353 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3354 bool isSoftFloat = Subtarget.useSoftFloat();
3355 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3356, __PRETTY_FUNCTION__))
3356 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3356, __PRETTY_FUNCTION__))
;
3357 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3358 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3359 // registers.
3360 return None;
3361
3362 static const MCPhysReg XMMArgRegs64Bit[] = {
3363 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3364 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3365 };
3366 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3367}
3368
3369#ifndef NDEBUG
3370static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3371 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
3372 [](const CCValAssign &A, const CCValAssign &B) -> bool {
3373 return A.getValNo() < B.getValNo();
3374 });
3375}
3376#endif
3377
3378SDValue X86TargetLowering::LowerFormalArguments(
3379 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3380 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3381 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3382 MachineFunction &MF = DAG.getMachineFunction();
3383 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3384 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3385
3386 const Function &F = MF.getFunction();
3387 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3388 F.getName() == "main")
3389 FuncInfo->setForceFramePointer(true);
3390
3391 MachineFrameInfo &MFI = MF.getFrameInfo();
3392 bool Is64Bit = Subtarget.is64Bit();
3393 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3394
3395 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3397, __PRETTY_FUNCTION__))
3396 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3397, __PRETTY_FUNCTION__))
3397 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3397, __PRETTY_FUNCTION__))
;
3398
3399 // Assign locations to all of the incoming arguments.
3400 SmallVector<CCValAssign, 16> ArgLocs;
3401 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3402
3403 // Allocate shadow area for Win64.
3404 if (IsWin64)
3405 CCInfo.AllocateStack(32, 8);
3406
3407 CCInfo.AnalyzeArguments(Ins, CC_X86);
3408
3409 // In vectorcall calling convention a second pass is required for the HVA
3410 // types.
3411 if (CallingConv::X86_VectorCall == CallConv) {
3412 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3413 }
3414
3415 // The next loop assumes that the locations are in the same order of the
3416 // input arguments.
3417 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3418, __PRETTY_FUNCTION__))
3418 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3418, __PRETTY_FUNCTION__))
;
3419
3420 SDValue ArgValue;
3421 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3422 ++I, ++InsIndex) {
3423 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3423, __PRETTY_FUNCTION__))
;
3424 CCValAssign &VA = ArgLocs[I];
3425
3426 if (VA.isRegLoc()) {
3427 EVT RegVT = VA.getLocVT();
3428 if (VA.needsCustom()) {
3429 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3431, __PRETTY_FUNCTION__))
3430 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3431, __PRETTY_FUNCTION__))
3431 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3431, __PRETTY_FUNCTION__))
;
3432
3433 // v64i1 values, in regcall calling convention, that are
3434 // compiled to 32 bit arch, are split up into two registers.
3435 ArgValue =
3436 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3437 } else {
3438 const TargetRegisterClass *RC;
3439 if (RegVT == MVT::i8)
3440 RC = &X86::GR8RegClass;
3441 else if (RegVT == MVT::i16)
3442 RC = &X86::GR16RegClass;
3443 else if (RegVT == MVT::i32)
3444 RC = &X86::GR32RegClass;
3445 else if (Is64Bit && RegVT == MVT::i64)
3446 RC = &X86::GR64RegClass;
3447 else if (RegVT == MVT::f32)
3448 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3449 else if (RegVT == MVT::f64)
3450 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3451 else if (RegVT == MVT::f80)
3452 RC = &X86::RFP80RegClass;
3453 else if (RegVT == MVT::f128)
3454 RC = &X86::VR128RegClass;
3455 else if (RegVT.is512BitVector())
3456 RC = &X86::VR512RegClass;
3457 else if (RegVT.is256BitVector())
3458 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3459 else if (RegVT.is128BitVector())
3460 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3461 else if (RegVT == MVT::x86mmx)
3462 RC = &X86::VR64RegClass;
3463 else if (RegVT == MVT::v1i1)
3464 RC = &X86::VK1RegClass;
3465 else if (RegVT == MVT::v8i1)
3466 RC = &X86::VK8RegClass;
3467 else if (RegVT == MVT::v16i1)
3468 RC = &X86::VK16RegClass;
3469 else if (RegVT == MVT::v32i1)
3470 RC = &X86::VK32RegClass;
3471 else if (RegVT == MVT::v64i1)
3472 RC = &X86::VK64RegClass;
3473 else
3474 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3474)
;
3475
3476 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3477 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3478 }
3479
3480 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3481 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3482 // right size.
3483 if (VA.getLocInfo() == CCValAssign::SExt)
3484 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3485 DAG.getValueType(VA.getValVT()));
3486 else if (VA.getLocInfo() == CCValAssign::ZExt)
3487 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3488 DAG.getValueType(VA.getValVT()));
3489 else if (VA.getLocInfo() == CCValAssign::BCvt)
3490 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3491
3492 if (VA.isExtInLoc()) {
3493 // Handle MMX values passed in XMM regs.
3494 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3495 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3496 else if (VA.getValVT().isVector() &&
3497 VA.getValVT().getScalarType() == MVT::i1 &&
3498 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3499 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3500 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3501 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3502 } else
3503 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3504 }
3505 } else {
3506 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3506, __PRETTY_FUNCTION__))
;
3507 ArgValue =
3508 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3509 }
3510
3511 // If value is passed via pointer - do a load.
3512 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3513 ArgValue =
3514 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3515
3516 InVals.push_back(ArgValue);
3517 }
3518
3519 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3520 // Swift calling convention does not require we copy the sret argument
3521 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3522 if (CallConv == CallingConv::Swift)
3523 continue;
3524
3525 // All x86 ABIs require that for returning structs by value we copy the
3526 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3527 // the argument into a virtual register so that we can access it from the
3528 // return points.
3529 if (Ins[I].Flags.isSRet()) {
3530 unsigned Reg = FuncInfo->getSRetReturnReg();
3531 if (!Reg) {
3532 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3533 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3534 FuncInfo->setSRetReturnReg(Reg);
3535 }
3536 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3537 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3538 break;
3539 }
3540 }
3541
3542 unsigned StackSize = CCInfo.getNextStackOffset();
3543 // Align stack specially for tail calls.
3544 if (shouldGuaranteeTCO(CallConv,
3545 MF.getTarget().Options.GuaranteedTailCallOpt))
3546 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3547
3548 // If the function takes variable number of arguments, make a frame index for
3549 // the start of the first vararg value... for expansion of llvm.va_start. We
3550 // can skip this if there are no va_start calls.
3551 if (MFI.hasVAStart() &&
3552 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3553 CallConv != CallingConv::X86_ThisCall))) {
3554 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3555 }
3556
3557 // Figure out if XMM registers are in use.
3558 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3560, __PRETTY_FUNCTION__))
3559 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3560, __PRETTY_FUNCTION__))
3560 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3560, __PRETTY_FUNCTION__))
;
3561
3562 // 64-bit calling conventions support varargs and register parameters, so we
3563 // have to do extra work to spill them in the prologue.
3564 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3565 // Find the first unallocated argument registers.
3566 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3567 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3568 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3569 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3570 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3571, __PRETTY_FUNCTION__))
3571 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3571, __PRETTY_FUNCTION__))
;
3572
3573 // Gather all the live in physical registers.
3574 SmallVector<SDValue, 6> LiveGPRs;
3575 SmallVector<SDValue, 8> LiveXMMRegs;
3576 SDValue ALVal;
3577 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3578 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3579 LiveGPRs.push_back(
3580 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3581 }
3582 if (!ArgXMMs.empty()) {
3583 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3584 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3585 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3586 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3587 LiveXMMRegs.push_back(
3588 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3589 }
3590 }
3591
3592 if (IsWin64) {
3593 // Get to the caller-allocated home save location. Add 8 to account
3594 // for the return address.
3595 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3596 FuncInfo->setRegSaveFrameIndex(
3597 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3598 // Fixup to set vararg frame on shadow area (4 x i64).
3599 if (NumIntRegs < 4)
3600 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3601 } else {
3602 // For X86-64, if there are vararg parameters that are passed via
3603 // registers, then we must store them to their spots on the stack so
3604 // they may be loaded by dereferencing the result of va_next.
3605 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3606 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3607 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3608 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3609 }
3610
3611 // Store the integer parameter registers.
3612 SmallVector<SDValue, 8> MemOps;
3613 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3614 getPointerTy(DAG.getDataLayout()));
3615 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3616 for (SDValue Val : LiveGPRs) {
3617 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3618 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3619 SDValue Store =
3620 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3621 MachinePointerInfo::getFixedStack(
3622 DAG.getMachineFunction(),
3623 FuncInfo->getRegSaveFrameIndex(), Offset));
3624 MemOps.push_back(Store);
3625 Offset += 8;
3626 }
3627
3628 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3629 // Now store the XMM (fp + vector) parameter registers.
3630 SmallVector<SDValue, 12> SaveXMMOps;
3631 SaveXMMOps.push_back(Chain);
3632 SaveXMMOps.push_back(ALVal);
3633 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3634 FuncInfo->getRegSaveFrameIndex(), dl));
3635 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3636 FuncInfo->getVarArgsFPOffset(), dl));
3637 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3638 LiveXMMRegs.end());
3639 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3640 MVT::Other, SaveXMMOps));
3641 }
3642
3643 if (!MemOps.empty())
3644 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3645 }
3646
3647 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3648 // Find the largest legal vector type.
3649 MVT VecVT = MVT::Other;
3650 // FIXME: Only some x86_32 calling conventions support AVX512.
3651 if (Subtarget.useAVX512Regs() &&
3652 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3653 CallConv == CallingConv::Intel_OCL_BI)))
3654 VecVT = MVT::v16f32;
3655 else if (Subtarget.hasAVX())
3656 VecVT = MVT::v8f32;
3657 else if (Subtarget.hasSSE2())
3658 VecVT = MVT::v4f32;
3659
3660 // We forward some GPRs and some vector types.
3661 SmallVector<MVT, 2> RegParmTypes;
3662 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3663 RegParmTypes.push_back(IntVT);
3664 if (VecVT != MVT::Other)
3665 RegParmTypes.push_back(VecVT);
3666
3667 // Compute the set of forwarded registers. The rest are scratch.
3668 SmallVectorImpl<ForwardedRegister> &Forwards =
3669 FuncInfo->getForwardedMustTailRegParms();
3670 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3671
3672 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3673 if (Is64Bit && !IsWin64 && !CCInfo.isAllocated(X86::AL)) {
3674 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3675 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3676 }
3677
3678 // Copy all forwards from physical to virtual registers.
3679 for (ForwardedRegister &FR : Forwards) {
3680 // FIXME: Can we use a less constrained schedule?
3681 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
3682 FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
3683 Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
3684 }
3685 }
3686
3687 // Some CCs need callee pop.
3688 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3689 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3690 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3691 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3692 // X86 interrupts must pop the error code (and the alignment padding) if
3693 // present.
3694 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3695 } else {
3696 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3697 // If this is an sret function, the return should pop the hidden pointer.
3698 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3699 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3700 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3701 FuncInfo->setBytesToPopOnReturn(4);
3702 }
3703
3704 if (!Is64Bit) {
3705 // RegSaveFrameIndex is X86-64 only.
3706 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3707 if (CallConv == CallingConv::X86_FastCall ||
3708 CallConv == CallingConv::X86_ThisCall)
3709 // fastcc functions can't have varargs.
3710 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3711 }
3712
3713 FuncInfo->setArgumentStackSize(StackSize);
3714
3715 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3716 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3717 if (Personality == EHPersonality::CoreCLR) {
3718 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3718, __PRETTY_FUNCTION__))
;
3719 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3720 // that we'd prefer this slot be allocated towards the bottom of the frame
3721 // (i.e. near the stack pointer after allocating the frame). Every
3722 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3723 // offset from the bottom of this and each funclet's frame must be the
3724 // same, so the size of funclets' (mostly empty) frames is dictated by
3725 // how far this slot is from the bottom (since they allocate just enough
3726 // space to accommodate holding this slot at the correct offset).
3727 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3728 EHInfo->PSPSymFrameIdx = PSPSymFI;
3729 }
3730 }
3731
3732 if (CallConv == CallingConv::X86_RegCall ||
3733 F.hasFnAttribute("no_caller_saved_registers")) {
3734 MachineRegisterInfo &MRI = MF.getRegInfo();
3735 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3736 MRI.disableCalleeSavedRegister(Pair.first);
3737 }
3738
3739 return Chain;
3740}
3741
3742SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3743 SDValue Arg, const SDLoc &dl,
3744 SelectionDAG &DAG,
3745 const CCValAssign &VA,
3746 ISD::ArgFlagsTy Flags) const {
3747 unsigned LocMemOffset = VA.getLocMemOffset();
3748 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3749 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3750 StackPtr, PtrOff);
3751 if (Flags.isByVal())
3752 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3753
3754 return DAG.getStore(
3755 Chain, dl, Arg, PtrOff,
3756 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3757}
3758
3759/// Emit a load of return address if tail call
3760/// optimization is performed and it is required.
3761SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3762 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3763 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3764 // Adjust the Return address stack slot.
3765 EVT VT = getPointerTy(DAG.getDataLayout());
3766 OutRetAddr = getReturnAddressFrameIndex(DAG);
3767
3768 // Load the "old" Return address.
3769 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3770 return SDValue(OutRetAddr.getNode(), 1);
3771}
3772
3773/// Emit a store of the return address if tail call
3774/// optimization is performed and it is required (FPDiff!=0).
3775static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3776 SDValue Chain, SDValue RetAddrFrIdx,
3777 EVT PtrVT, unsigned SlotSize,
3778 int FPDiff, const SDLoc &dl) {
3779 // Store the return address to the appropriate stack slot.
3780 if (!FPDiff) return Chain;
3781 // Calculate the new stack slot for the return address.
3782 int NewReturnAddrFI =
3783 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3784 false);
3785 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3786 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3787 MachinePointerInfo::getFixedStack(
3788 DAG.getMachineFunction(), NewReturnAddrFI));
3789 return Chain;
3790}
3791
3792/// Returns a vector_shuffle mask for an movs{s|d}, movd
3793/// operation of specified width.
3794static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3795 SDValue V2) {
3796 unsigned NumElems = VT.getVectorNumElements();
3797 SmallVector<int, 8> Mask;
3798 Mask.push_back(NumElems);
3799 for (unsigned i = 1; i != NumElems; ++i)
3800 Mask.push_back(i);
3801 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3802}
3803
3804SDValue
3805X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3806 SmallVectorImpl<SDValue> &InVals) const {
3807 SelectionDAG &DAG = CLI.DAG;
3808 SDLoc &dl = CLI.DL;
3809 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3810 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3811 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3812 SDValue Chain = CLI.Chain;
3813 SDValue Callee = CLI.Callee;
3814 CallingConv::ID CallConv = CLI.CallConv;
3815 bool &isTailCall = CLI.IsTailCall;
3816 bool isVarArg = CLI.IsVarArg;
3817
3818 MachineFunction &MF = DAG.getMachineFunction();
3819 bool Is64Bit = Subtarget.is64Bit();
3820 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3821 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3822 bool IsSibcall = false;
3823 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3824 CallConv == CallingConv::Tail;
3825 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3826 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3827 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3828 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3829 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3830 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3831 bool HasNoCfCheck =
3832 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3833 const Module *M = MF.getMMI().getModule();
3834 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3835
3836 MachineFunction::CallSiteInfo CSInfo;
3837
3838 if (CallConv == CallingConv::X86_INTR)
3839 report_fatal_error("X86 interrupts may not be called directly");
3840
3841 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
3842 // If we are using a GOT, disable tail calls to external symbols with
3843 // default visibility. Tail calling such a symbol requires using a GOT
3844 // relocation, which forces early binding of the symbol. This breaks code
3845 // that require lazy function symbol resolution. Using musttail or
3846 // GuaranteedTailCallOpt will override this.
3847 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3848 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3849 G->getGlobal()->hasDefaultVisibility()))
3850 isTailCall = false;
3851 }
3852
3853 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3854 if (IsMustTail) {
3855 // Force this to be a tail call. The verifier rules are enough to ensure
3856 // that we can lower this successfully without moving the return address
3857 // around.
3858 isTailCall = true;
3859 } else if (isTailCall) {
3860 // Check if it's really possible to do a tail call.
3861 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3862 isVarArg, SR != NotStructReturn,
3863 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3864 Outs, OutVals, Ins, DAG);
3865
3866 // Sibcalls are automatically detected tailcalls which do not require
3867 // ABI changes.
3868 if (!IsGuaranteeTCO && isTailCall)
3869 IsSibcall = true;
3870
3871 if (isTailCall)
3872 ++NumTailCalls;
3873 }
3874
3875 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3876, __PRETTY_FUNCTION__))
3876 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3876, __PRETTY_FUNCTION__))
;
3877
3878 // Analyze operands of the call, assigning locations to each operand.
3879 SmallVector<CCValAssign, 16> ArgLocs;
3880 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3881
3882 // Allocate shadow area for Win64.
3883 if (IsWin64)
3884 CCInfo.AllocateStack(32, 8);
3885
3886 CCInfo.AnalyzeArguments(Outs, CC_X86);
3887
3888 // In vectorcall calling convention a second pass is required for the HVA
3889 // types.
3890 if (CallingConv::X86_VectorCall == CallConv) {
3891 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3892 }
3893
3894 // Get a count of how many bytes are to be pushed on the stack.
3895 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3896 if (IsSibcall)
3897 // This is a sibcall. The memory operands are available in caller's
3898 // own caller's stack.
3899 NumBytes = 0;
3900 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
3901 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3902
3903 int FPDiff = 0;
3904 if (isTailCall && !IsSibcall && !IsMustTail) {
3905 // Lower arguments at fp - stackoffset + fpdiff.
3906 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3907
3908 FPDiff = NumBytesCallerPushed - NumBytes;
3909
3910 // Set the delta of movement of the returnaddr stackslot.
3911 // But only set if delta is greater than previous delta.
3912 if (FPDiff < X86Info->getTCReturnAddrDelta())
3913 X86Info->setTCReturnAddrDelta(FPDiff);
3914 }
3915
3916 unsigned NumBytesToPush = NumBytes;
3917 unsigned NumBytesToPop = NumBytes;
3918
3919 // If we have an inalloca argument, all stack space has already been allocated
3920 // for us and be right at the top of the stack. We don't support multiple
3921 // arguments passed in memory when using inalloca.
3922 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3923 NumBytesToPush = 0;
3924 if (!ArgLocs.back().isMemLoc())
3925 report_fatal_error("cannot use inalloca attribute on a register "
3926 "parameter");
3927 if (ArgLocs.back().getLocMemOffset() != 0)
3928 report_fatal_error("any parameter with the inalloca attribute must be "
3929 "the only memory argument");
3930 }
3931
3932 if (!IsSibcall && !IsMustTail)
3933 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3934 NumBytes - NumBytesToPush, dl);
3935
3936 SDValue RetAddrFrIdx;
3937 // Load return address for tail calls.
3938 if (isTailCall && FPDiff)
3939 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3940 Is64Bit, FPDiff, dl);
3941
3942 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3943 SmallVector<SDValue, 8> MemOpChains;
3944 SDValue StackPtr;
3945
3946 // The next loop assumes that the locations are in the same order of the
3947 // input arguments.
3948 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3949, __PRETTY_FUNCTION__))
3949 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3949, __PRETTY_FUNCTION__))
;
3950
3951 // Walk the register/memloc assignments, inserting copies/loads. In the case
3952 // of tail call optimization arguments are handle later.
3953 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3954 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3955 ++I, ++OutIndex) {
3956 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3956, __PRETTY_FUNCTION__))
;
3957 // Skip inalloca arguments, they have already been written.
3958 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3959 if (Flags.isInAlloca())
3960 continue;
3961
3962 CCValAssign &VA = ArgLocs[I];
3963 EVT RegVT = VA.getLocVT();
3964 SDValue Arg = OutVals[OutIndex];
3965 bool isByVal = Flags.isByVal();
3966
3967 // Promote the value if needed.
3968 switch (VA.getLocInfo()) {
3969 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3969)
;
3970 case CCValAssign::Full: break;
3971 case CCValAssign::SExt:
3972 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3973 break;
3974 case CCValAssign::ZExt:
3975 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3976 break;
3977 case CCValAssign::AExt:
3978 if (Arg.getValueType().isVector() &&
3979 Arg.getValueType().getVectorElementType() == MVT::i1)
3980 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3981 else if (RegVT.is128BitVector()) {
3982 // Special case: passing MMX values in XMM registers.
3983 Arg = DAG.getBitcast(MVT::i64, Arg);
3984 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3985 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3986 } else
3987 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3988 break;
3989 case CCValAssign::BCvt:
3990 Arg = DAG.getBitcast(RegVT, Arg);
3991 break;
3992 case CCValAssign::Indirect: {
3993 if (isByVal) {
3994 // Memcpy the argument to a temporary stack slot to prevent
3995 // the caller from seeing any modifications the callee may make
3996 // as guaranteed by the `byval` attribute.
3997 int FrameIdx = MF.getFrameInfo().CreateStackObject(
3998 Flags.getByValSize(),
3999 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4000 SDValue StackSlot =
4001 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4002 Chain =
4003 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4004 // From now on treat this as a regular pointer
4005 Arg = StackSlot;
4006 isByVal = false;
4007 } else {
4008 // Store the argument.
4009 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4010 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4011 Chain = DAG.getStore(
4012 Chain, dl, Arg, SpillSlot,
4013 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4014 Arg = SpillSlot;
4015 }
4016 break;
4017 }
4018 }
4019
4020 if (VA.needsCustom()) {
4021 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4022, __PRETTY_FUNCTION__))
4022 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4022, __PRETTY_FUNCTION__))
;
4023 // Split v64i1 value into two registers
4024 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4025 } else if (VA.isRegLoc()) {
4026 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4027 const TargetOptions &Options = DAG.getTarget().Options;
4028 if (Options.EnableDebugEntryValues)
4029 CSInfo.emplace_back(VA.getLocReg(), I);
4030 if (isVarArg && IsWin64) {
4031 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4032 // shadow reg if callee is a varargs function.
4033 unsigned ShadowReg = 0;
4034 switch (VA.getLocReg()) {
4035 case X86::XMM0: ShadowReg = X86::RCX; break;
4036 case X86::XMM1: ShadowReg = X86::RDX; break;
4037 case X86::XMM2: ShadowReg = X86::R8; break;
4038 case X86::XMM3: ShadowReg = X86::R9; break;
4039 }
4040 if (ShadowReg)
4041 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4042 }
4043 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4044 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4044, __PRETTY_FUNCTION__))
;
4045 if (!StackPtr.getNode())
4046 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4047 getPointerTy(DAG.getDataLayout()));
4048 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4049 dl, DAG, VA, Flags));
4050 }
4051 }
4052
4053 if (!MemOpChains.empty())
4054 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4055
4056 if (Subtarget.isPICStyleGOT()) {
4057 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4058 // GOT pointer.
4059 if (!isTailCall) {
4060 RegsToPass.push_back(std::make_pair(
4061 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4062 getPointerTy(DAG.getDataLayout()))));
4063 } else {
4064 // If we are tail calling and generating PIC/GOT style code load the
4065 // address of the callee into ECX. The value in ecx is used as target of
4066 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4067 // for tail calls on PIC/GOT architectures. Normally we would just put the
4068 // address of GOT into ebx and then call target@PLT. But for tail calls
4069 // ebx would be restored (since ebx is callee saved) before jumping to the
4070 // target@PLT.
4071
4072 // Note: The actual moving to ECX is done further down.
4073 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4074 if (G && !G->getGlobal()->hasLocalLinkage() &&
4075 G->getGlobal()->hasDefaultVisibility())
4076 Callee = LowerGlobalAddress(Callee, DAG);
4077 else if (isa<ExternalSymbolSDNode>(Callee))
4078 Callee = LowerExternalSymbol(Callee, DAG);
4079 }
4080 }
4081
4082 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4083 // From AMD64 ABI document:
4084 // For calls that may call functions that use varargs or stdargs
4085 // (prototype-less calls or calls to functions containing ellipsis (...) in
4086 // the declaration) %al is used as hidden argument to specify the number
4087 // of SSE registers used. The contents of %al do not need to match exactly
4088 // the number of registers, but must be an ubound on the number of SSE
4089 // registers used and is in the range 0 - 8 inclusive.
4090
4091 // Count the number of XMM registers allocated.
4092 static const MCPhysReg XMMArgRegs[] = {
4093 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4094 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4095 };
4096 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4097 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4098, __PRETTY_FUNCTION__))
4098 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4098, __PRETTY_FUNCTION__))
;
4099
4100 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
4101 DAG.getConstant(NumXMMRegs, dl,
4102 MVT::i8)));
4103 }
4104
4105 if (isVarArg && IsMustTail) {
4106 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4107 for (const auto &F : Forwards) {
4108 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4109 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
4110 }
4111 }
4112
4113 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4114 // don't need this because the eligibility check rejects calls that require
4115 // shuffling arguments passed in memory.
4116 if (!IsSibcall && isTailCall) {
4117 // Force all the incoming stack arguments to be loaded from the stack
4118 // before any new outgoing arguments are stored to the stack, because the
4119 // outgoing stack slots may alias the incoming argument stack slots, and
4120 // the alias isn't otherwise explicit. This is slightly more conservative
4121 // than necessary, because it means that each store effectively depends
4122 // on every argument instead of just those arguments it would clobber.
4123 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4124
4125 SmallVector<SDValue, 8> MemOpChains2;
4126 SDValue FIN;
4127 int FI = 0;
4128 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4129 ++I, ++OutsIndex) {
4130 CCValAssign &VA = ArgLocs[I];
4131
4132 if (VA.isRegLoc()) {
4133 if (VA.needsCustom()) {
4134 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4135, __PRETTY_FUNCTION__))
4135 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4135, __PRETTY_FUNCTION__))
;
4136 // This means that we are in special case where one argument was
4137 // passed through two register locations - Skip the next location
4138 ++I;
4139 }
4140
4141 continue;
4142 }
4143
4144 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4144, __PRETTY_FUNCTION__))
;
4145 SDValue Arg = OutVals[OutsIndex];
4146 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4147 // Skip inalloca arguments. They don't require any work.
4148 if (Flags.isInAlloca())
4149 continue;
4150 // Create frame index.
4151 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4152 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4153 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4154 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4155
4156 if (Flags.isByVal()) {
4157 // Copy relative to framepointer.
4158 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4159 if (!StackPtr.getNode())
4160 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4161 getPointerTy(DAG.getDataLayout()));
4162 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4163 StackPtr, Source);
4164
4165 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4166 ArgChain,
4167 Flags, DAG, dl));
4168 } else {
4169 // Store relative to framepointer.
4170 MemOpChains2.push_back(DAG.getStore(
4171 ArgChain, dl, Arg, FIN,
4172 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4173 }
4174 }
4175
4176 if (!MemOpChains2.empty())
4177 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4178
4179 // Store the return address to the appropriate stack slot.
4180 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4181 getPointerTy(DAG.getDataLayout()),
4182 RegInfo->getSlotSize(), FPDiff, dl);
4183 }
4184
4185 // Build a sequence of copy-to-reg nodes chained together with token chain
4186 // and flag operands which copy the outgoing args into registers.
4187 SDValue InFlag;
4188 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4189 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4190 RegsToPass[i].second, InFlag);
4191 InFlag = Chain.getValue(1);
4192 }
4193
4194 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4195 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4195, __PRETTY_FUNCTION__))
;
4196 // In the 64-bit large code model, we have to make all calls
4197 // through a register, since the call instruction's 32-bit
4198 // pc-relative offset may not be large enough to hold the whole
4199 // address.
4200 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4201 Callee->getOpcode() == ISD::ExternalSymbol) {
4202 // Lower direct calls to global addresses and external symbols. Setting
4203 // ForCall to true here has the effect of removing WrapperRIP when possible
4204 // to allow direct calls to be selected without first materializing the
4205 // address into a register.
4206 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4207 } else if (Subtarget.isTarget64BitILP32() &&
4208 Callee->getValueType(0) == MVT::i32) {
4209 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4210 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4211 }
4212
4213 // Returns a chain & a flag for retval copy to use.
4214 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4215 SmallVector<SDValue, 8> Ops;
4216
4217 if (!IsSibcall && isTailCall && !IsMustTail) {
4218 Chain = DAG.getCALLSEQ_END(Chain,
4219 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4220 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4221 InFlag = Chain.getValue(1);
4222 }
4223
4224 Ops.push_back(Chain);
4225 Ops.push_back(Callee);
4226
4227 if (isTailCall)
4228 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
4229
4230 // Add argument registers to the end of the list so that they are known live
4231 // into the call.
4232 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4233 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4234 RegsToPass[i].second.getValueType()));
4235
4236 // Add a register mask operand representing the call-preserved registers.
4237 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4238 // set X86_INTR calling convention because it has the same CSR mask
4239 // (same preserved registers).
4240 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4241 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4242 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4242, __PRETTY_FUNCTION__))
;
4243
4244 // If this is an invoke in a 32-bit function using a funclet-based
4245 // personality, assume the function clobbers all registers. If an exception
4246 // is thrown, the runtime will not restore CSRs.
4247 // FIXME: Model this more precisely so that we can register allocate across
4248 // the normal edge and spill and fill across the exceptional edge.
4249 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
4250 const Function &CallerFn = MF.getFunction();
4251 EHPersonality Pers =
4252 CallerFn.hasPersonalityFn()
4253 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4254 : EHPersonality::Unknown;
4255 if (isFuncletEHPersonality(Pers))
4256 Mask = RegInfo->getNoPreservedMask();
4257 }
4258
4259 // Define a new register mask from the existing mask.
4260 uint32_t *RegMask = nullptr;
4261
4262 // In some calling conventions we need to remove the used physical registers
4263 // from the reg mask.
4264 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4265 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4266
4267 // Allocate a new Reg Mask and copy Mask.
4268 RegMask = MF.allocateRegMask();
4269 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4270 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4271
4272 // Make sure all sub registers of the argument registers are reset
4273 // in the RegMask.
4274 for (auto const &RegPair : RegsToPass)
4275 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4276 SubRegs.isValid(); ++SubRegs)
4277 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4278
4279 // Create the RegMask Operand according to our updated mask.
4280 Ops.push_back(DAG.getRegisterMask(RegMask));
4281 } else {
4282 // Create the RegMask Operand according to the static mask.
4283 Ops.push_back(DAG.getRegisterMask(Mask));
4284 }
4285
4286 if (InFlag.getNode())
4287 Ops.push_back(InFlag);
4288
4289 if (isTailCall) {
4290 // We used to do:
4291 //// If this is the first return lowered for this function, add the regs
4292 //// to the liveout set for the function.
4293 // This isn't right, although it's probably harmless on x86; liveouts
4294 // should be computed from returns not tail calls. Consider a void
4295 // function making a tail call to a function returning int.
4296 MF.getFrameInfo().setHasTailCall();
4297 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4298 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4299 return Ret;
4300 }
4301
4302 if (HasNoCfCheck && IsCFProtectionSupported) {
4303 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4304 } else {
4305 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4306 }
4307 InFlag = Chain.getValue(1);
4308 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4309
4310 // Save heapallocsite metadata.
4311 if (CLI.CS)
4312 if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
4313 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4314
4315 // Create the CALLSEQ_END node.
4316 unsigned NumBytesForCalleeToPop;
4317 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4318 DAG.getTarget().Options.GuaranteedTailCallOpt))
4319 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4320 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4321 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4322 SR == StackStructReturn)
4323 // If this is a call to a struct-return function, the callee
4324 // pops the hidden struct pointer, so we have to push it back.
4325 // This is common for Darwin/X86, Linux & Mingw32 targets.
4326 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4327 NumBytesForCalleeToPop = 4;
4328 else
4329 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4330
4331 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
4332 // No need to reset the stack after the call if the call doesn't return. To
4333 // make the MI verify, we'll pretend the callee does it for us.
4334 NumBytesForCalleeToPop = NumBytes;
4335 }
4336
4337 // Returns a flag for retval copy to use.
4338 if (!IsSibcall) {
4339 Chain = DAG.getCALLSEQ_END(Chain,
4340 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4341 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4342 true),
4343 InFlag, dl);
4344 InFlag = Chain.getValue(1);
4345 }
4346
4347 // Handle result values, copying them out of physregs into vregs that we
4348 // return.
4349 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4350 InVals, RegMask);
4351}
4352
4353//===----------------------------------------------------------------------===//
4354// Fast Calling Convention (tail call) implementation
4355//===----------------------------------------------------------------------===//
4356
4357// Like std call, callee cleans arguments, convention except that ECX is
4358// reserved for storing the tail called function address. Only 2 registers are
4359// free for argument passing (inreg). Tail call optimization is performed
4360// provided:
4361// * tailcallopt is enabled
4362// * caller/callee are fastcc
4363// On X86_64 architecture with GOT-style position independent code only local
4364// (within module) calls are supported at the moment.
4365// To keep the stack aligned according to platform abi the function
4366// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4367// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4368// If a tail called function callee has more arguments than the caller the
4369// caller needs to make sure that there is room to move the RETADDR to. This is
4370// achieved by reserving an area the size of the argument delta right after the
4371// original RETADDR, but before the saved framepointer or the spilled registers
4372// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4373// stack layout:
4374// arg1
4375// arg2
4376// RETADDR
4377// [ new RETADDR
4378// move area ]
4379// (possible EBP)
4380// ESI
4381// EDI
4382// local1 ..
4383
4384/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4385/// requirement.
4386unsigned
4387X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4388 SelectionDAG &DAG) const {
4389 const Align StackAlignment(Subtarget.getFrameLowering()->getStackAlignment());
4390 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4391 assert(StackSize % SlotSize == 0 &&((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4392, __PRETTY_FUNCTION__))
4392 "StackSize must be a multiple of SlotSize")((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4392, __PRETTY_FUNCTION__))
;
4393 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4394}
4395
4396/// Return true if the given stack call argument is already available in the
4397/// same position (relatively) of the caller's incoming argument stack.
4398static
4399bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4400 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4401 const X86InstrInfo *TII, const CCValAssign &VA) {
4402 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4403
4404 for (;;) {
4405 // Look through nodes that don't alter the bits of the incoming value.
4406 unsigned Op = Arg.getOpcode();
4407 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4408 Arg = Arg.getOperand(0);
4409 continue;
4410 }
4411 if (Op == ISD::TRUNCATE) {
4412 const SDValue &TruncInput = Arg.getOperand(0);
4413 if (TruncInput.getOpcode() == ISD::AssertZext &&
4414 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4415 Arg.getValueType()) {
4416 Arg = TruncInput.getOperand(0);
4417 continue;
4418 }
4419 }
4420 break;
4421 }
4422
4423 int FI = INT_MAX2147483647;
4424 if (Arg.getOpcode() == ISD::CopyFromReg) {
4425 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4426 if (!Register::isVirtualRegister(VR))
4427 return false;
4428 MachineInstr *Def = MRI->getVRegDef(VR);
4429 if (!Def)
4430 return false;
4431 if (!Flags.isByVal()) {
4432 if (!TII->isLoadFromStackSlot(*Def, FI))
4433 return false;
4434 } else {
4435 unsigned Opcode = Def->getOpcode();
4436 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4437 Opcode == X86::LEA64_32r) &&
4438 Def->getOperand(1).isFI()) {
4439 FI = Def->getOperand(1).getIndex();
4440 Bytes = Flags.getByValSize();
4441 } else
4442 return false;
4443 }
4444 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4445 if (Flags.isByVal())
4446 // ByVal argument is passed in as a pointer but it's now being
4447 // dereferenced. e.g.
4448 // define @foo(%struct.X* %A) {
4449 // tail call @bar(%struct.X* byval %A)
4450 // }
4451 return false;
4452 SDValue Ptr = Ld->getBasePtr();
4453 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4454 if (!FINode)
4455 return false;
4456 FI = FINode->getIndex();
4457 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4458 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4459 FI = FINode->getIndex();
4460 Bytes = Flags.getByValSize();
4461 } else
4462 return false;
4463
4464 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4464, __PRETTY_FUNCTION__))
;
4465 if (!MFI.isFixedObjectIndex(FI))
4466 return false;
4467
4468 if (Offset != MFI.getObjectOffset(FI))
4469 return false;
4470
4471 // If this is not byval, check that the argument stack object is immutable.
4472 // inalloca and argument copy elision can create mutable argument stack
4473 // objects. Byval objects can be mutated, but a byval call intends to pass the
4474 // mutated memory.
4475 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4476 return false;
4477
4478 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4479 // If the argument location is wider than the argument type, check that any
4480 // extension flags match.
4481 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4482 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4483 return false;
4484 }
4485 }
4486
4487 return Bytes == MFI.getObjectSize(FI);
4488}
4489
4490/// Check whether the call is eligible for tail call optimization. Targets
4491/// that want to do tail call optimization should implement this function.
4492bool X86TargetLowering::IsEligibleForTailCallOptimization(
4493 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4494 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4495 const SmallVectorImpl<ISD::OutputArg> &Outs,
4496 const SmallVectorImpl<SDValue> &OutVals,
4497 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4498 if (!mayTailCallThisCC(CalleeCC))
4499 return false;
4500
4501 // If -tailcallopt is specified, make fastcc functions tail-callable.
4502 MachineFunction &MF = DAG.getMachineFunction();
4503 const Function &CallerF = MF.getFunction();
4504
4505 // If the function return type is x86_fp80 and the callee return type is not,
4506 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4507 // perform a tailcall optimization here.
4508 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4509 return false;
4510
4511 CallingConv::ID CallerCC = CallerF.getCallingConv();
4512 bool CCMatch = CallerCC == CalleeCC;
4513 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4514 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4515 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4516 CalleeCC == CallingConv::Tail;
4517
4518 // Win64 functions have extra shadow space for argument homing. Don't do the
4519 // sibcall if the caller and callee have mismatched expectations for this
4520 // space.
4521 if (IsCalleeWin64 != IsCallerWin64)
4522 return false;
4523
4524 if (IsGuaranteeTCO) {
4525 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4526 return true;
4527 return false;
4528 }
4529
4530 // Look for obvious safe cases to perform tail call optimization that do not
4531 // require ABI changes. This is what gcc calls sibcall.
4532
4533 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4534 // emit a special epilogue.
4535 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4536 if (RegInfo->needsStackRealignment(MF))
4537 return false;
4538
4539 // Also avoid sibcall optimization if either caller or callee uses struct
4540 // return semantics.
4541 if (isCalleeStructRet || isCallerStructRet)
4542 return false;
4543
4544 // Do not sibcall optimize vararg calls unless all arguments are passed via
4545 // registers.
4546 LLVMContext &C = *DAG.getContext();
4547 if (isVarArg && !Outs.empty()) {
4548 // Optimizing for varargs on Win64 is unlikely to be safe without
4549 // additional testing.
4550 if (IsCalleeWin64 || IsCallerWin64)
4551 return false;
4552
4553 SmallVector<CCValAssign, 16> ArgLocs;
4554 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4555
4556 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4557 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4558 if (!ArgLocs[i].isRegLoc())
4559 return false;
4560 }
4561
4562 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4563 // stack. Therefore, if it's not used by the call it is not safe to optimize
4564 // this into a sibcall.
4565 bool Unused = false;
4566 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4567 if (!Ins[i].Used) {
4568 Unused = true;
4569 break;
4570 }
4571 }
4572 if (Unused) {
4573 SmallVector<CCValAssign, 16> RVLocs;
4574 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4575 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4576 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4577 CCValAssign &VA = RVLocs[i];
4578 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4579 return false;
4580 }
4581 }
4582
4583 // Check that the call results are passed in the same way.
4584 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4585 RetCC_X86, RetCC_X86))
4586 return false;
4587 // The callee has to preserve all registers the caller needs to preserve.
4588 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4589 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4590 if (!CCMatch) {
4591 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4592 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4593 return false;
4594 }
4595
4596 unsigned StackArgsSize = 0;
4597
4598 // If the callee takes no arguments then go on to check the results of the
4599 // call.
4600 if (!Outs.empty()) {
4601 // Check if stack adjustment is needed. For now, do not do this if any
4602 // argument is passed on the stack.
4603 SmallVector<CCValAssign, 16> ArgLocs;
4604 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4605
4606 // Allocate shadow area for Win64
4607 if (IsCalleeWin64)
4608 CCInfo.AllocateStack(32, 8);
4609
4610 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4611 StackArgsSize = CCInfo.getNextStackOffset();
4612
4613 if (CCInfo.getNextStackOffset()) {
4614 // Check if the arguments are already laid out in the right way as
4615 // the caller's fixed stack objects.
4616 MachineFrameInfo &MFI = MF.getFrameInfo();
4617 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4618 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4619 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4620 CCValAssign &VA = ArgLocs[i];
4621 SDValue Arg = OutVals[i];
4622 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4623 if (VA.getLocInfo() == CCValAssign::Indirect)
4624 return false;
4625 if (!VA.isRegLoc()) {
4626 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4627 MFI, MRI, TII, VA))
4628 return false;
4629 }
4630 }
4631 }
4632
4633 bool PositionIndependent = isPositionIndependent();
4634 // If the tailcall address may be in a register, then make sure it's
4635 // possible to register allocate for it. In 32-bit, the call address can
4636 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4637 // callee-saved registers are restored. These happen to be the same
4638 // registers used to pass 'inreg' arguments so watch out for those.
4639 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4640 !isa<ExternalSymbolSDNode>(Callee)) ||
4641 PositionIndependent)) {
4642 unsigned NumInRegs = 0;
4643 // In PIC we need an extra register to formulate the address computation
4644 // for the callee.
4645 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4646
4647 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4648 CCValAssign &VA = ArgLocs[i];
4649 if (!VA.isRegLoc())
4650 continue;
4651 Register Reg = VA.getLocReg();
4652 switch (Reg) {
4653 default: break;
4654 case X86::EAX: case X86::EDX: case X86::ECX:
4655 if (++NumInRegs == MaxInRegs)
4656 return false;
4657 break;
4658 }
4659 }
4660 }
4661
4662 const MachineRegisterInfo &MRI = MF.getRegInfo();
4663 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4664 return false;
4665 }
4666
4667 bool CalleeWillPop =
4668 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4669 MF.getTarget().Options.GuaranteedTailCallOpt);
4670
4671 if (unsigned BytesToPop =
4672 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4673 // If we have bytes to pop, the callee must pop them.
4674 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4675 if (!CalleePopMatches)
4676 return false;
4677 } else if (CalleeWillPop && StackArgsSize > 0) {
4678 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4679 return false;
4680 }
4681
4682 return true;
4683}
4684
4685FastISel *
4686X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4687 const TargetLibraryInfo *libInfo) const {
4688 return X86::createFastISel(funcInfo, libInfo);
4689}
4690
4691//===----------------------------------------------------------------------===//
4692// Other Lowering Hooks
4693//===----------------------------------------------------------------------===//
4694
4695static bool MayFoldLoad(SDValue Op) {
4696 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4697}
4698
4699static bool MayFoldIntoStore(SDValue Op) {
4700 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4701}
4702
4703static bool MayFoldIntoZeroExtend(SDValue Op) {
4704 if (Op.hasOneUse()) {
4705 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4706 return (ISD::ZERO_EXTEND == Opcode);
4707 }
4708 return false;
4709}
4710
4711static bool isTargetShuffle(unsigned Opcode) {
4712 switch(Opcode) {
4713 default: return false;
4714 case X86ISD::BLENDI:
4715 case X86ISD::PSHUFB:
4716 case X86ISD::PSHUFD:
4717 case X86ISD::PSHUFHW:
4718 case X86ISD::PSHUFLW:
4719 case X86ISD::SHUFP:
4720 case X86ISD::INSERTPS:
4721 case X86ISD::EXTRQI:
4722 case X86ISD::INSERTQI:
4723 case X86ISD::PALIGNR:
4724 case X86ISD::VSHLDQ:
4725 case X86ISD::VSRLDQ:
4726 case X86ISD::MOVLHPS:
4727 case X86ISD::MOVHLPS:
4728 case X86ISD::MOVSHDUP:
4729 case X86ISD::MOVSLDUP:
4730 case X86ISD::MOVDDUP:
4731 case X86ISD::MOVSS:
4732 case X86ISD::MOVSD:
4733 case X86ISD::UNPCKL:
4734 case X86ISD::UNPCKH:
4735 case X86ISD::VBROADCAST:
4736 case X86ISD::VPERMILPI:
4737 case X86ISD::VPERMILPV:
4738 case X86ISD::VPERM2X128:
4739 case X86ISD::SHUF128:
4740 case X86ISD::VPERMIL2:
4741 case X86ISD::VPERMI:
4742 case X86ISD::VPPERM:
4743 case X86ISD::VPERMV:
4744 case X86ISD::VPERMV3:
4745 case X86ISD::VZEXT_MOVL:
4746 return true;
4747 }
4748}
4749
4750static bool isTargetShuffleVariableMask(unsigned Opcode) {
4751 switch (Opcode) {
4752 default: return false;
4753 // Target Shuffles.
4754 case X86ISD::PSHUFB:
4755 case X86ISD::VPERMILPV:
4756 case X86ISD::VPERMIL2:
4757 case X86ISD::VPPERM:
4758 case X86ISD::VPERMV:
4759 case X86ISD::VPERMV3:
4760 return true;
4761 // 'Faux' Target Shuffles.
4762 case ISD::OR:
4763 case ISD::AND:
4764 case X86ISD::ANDNP:
4765 return true;
4766 }
4767}
4768
4769SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4770 MachineFunction &MF = DAG.getMachineFunction();
4771 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4772 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4773 int ReturnAddrIndex = FuncInfo->getRAIndex();
4774
4775 if (ReturnAddrIndex == 0) {
4776 // Set up a frame object for the return address.
4777 unsigned SlotSize = RegInfo->getSlotSize();
4778 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4779 -(int64_t)SlotSize,
4780 false);
4781 FuncInfo->setRAIndex(ReturnAddrIndex);
4782 }
4783
4784 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4785}
4786
4787bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4788 bool hasSymbolicDisplacement) {
4789 // Offset should fit into 32 bit immediate field.
4790 if (!isInt<32>(Offset))
4791 return false;
4792
4793 // If we don't have a symbolic displacement - we don't have any extra
4794 // restrictions.
4795 if (!hasSymbolicDisplacement)
4796 return true;
4797
4798 // FIXME: Some tweaks might be needed for medium code model.
4799 if (M != CodeModel::Small && M != CodeModel::Kernel)
4800 return false;
4801
4802 // For small code model we assume that latest object is 16MB before end of 31
4803 // bits boundary. We may also accept pretty large negative constants knowing
4804 // that all objects are in the positive half of address space.
4805 if (M == CodeModel::Small && Offset < 16*1024*1024)
4806 return true;
4807
4808 // For kernel code model we know that all object resist in the negative half
4809 // of 32bits address space. We may not accept negative offsets, since they may
4810 // be just off and we may accept pretty large positive ones.
4811 if (M == CodeModel::Kernel && Offset >= 0)
4812 return true;
4813
4814 return false;
4815}
4816
4817/// Determines whether the callee is required to pop its own arguments.
4818/// Callee pop is necessary to support tail calls.
4819bool X86::isCalleePop(CallingConv::ID CallingConv,
4820 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4821 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4822 // can guarantee TCO.
4823 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4824 return true;
4825
4826 switch (CallingConv) {
4827 default:
4828 return false;
4829 case CallingConv::X86_StdCall:
4830 case CallingConv::X86_FastCall:
4831 case CallingConv::X86_ThisCall:
4832 case CallingConv::X86_VectorCall:
4833 return !is64Bit;
4834 }
4835}
4836
4837/// Return true if the condition is an signed comparison operation.
4838static bool isX86CCSigned(unsigned X86CC) {
4839 switch (X86CC) {
4840 default:
4841 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4841)
;
4842 case X86::COND_E:
4843 case X86::COND_NE:
4844 case X86::COND_B:
4845 case X86::COND_A:
4846 case X86::COND_BE:
4847 case X86::COND_AE:
4848 return false;
4849 case X86::COND_G:
4850 case X86::COND_GE:
4851 case X86::COND_L:
4852 case X86::COND_LE:
4853 return true;
4854 }
4855}
4856
4857static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4858 switch (SetCCOpcode) {
4859 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4859)
;
4860 case ISD::SETEQ: return X86::COND_E;
4861 case ISD::SETGT: return X86::COND_G;
4862 case ISD::SETGE: return X86::COND_GE;
4863 case ISD::SETLT: return X86::COND_L;
4864 case ISD::SETLE: return X86::COND_LE;
4865 case ISD::SETNE: return X86::COND_NE;
4866 case ISD::SETULT: return X86::COND_B;
4867 case ISD::SETUGT: return X86::COND_A;
4868 case ISD::SETULE: return X86::COND_BE;
4869 case ISD::SETUGE: return X86::COND_AE;
4870 }
4871}
4872
4873/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4874/// condition code, returning the condition code and the LHS/RHS of the
4875/// comparison to make.
4876static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4877 bool isFP, SDValue &LHS, SDValue &RHS,
4878 SelectionDAG &DAG) {
4879 if (!isFP) {
4880 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4881 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4882 // X > -1 -> X == 0, jump !sign.
4883 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4884 return X86::COND_NS;
4885 }
4886 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4887 // X < 0 -> X == 0, jump on sign.
4888 return X86::COND_S;
4889 }
4890 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
4891 // X >= 0 -> X == 0, jump on !sign.
4892 return X86::COND_NS;
4893 }
4894 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
4895 // X < 1 -> X <= 0
4896 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4897 return X86::COND_LE;
4898 }
4899 }
4900
4901 return TranslateIntegerX86CC(SetCCOpcode);
4902 }
4903
4904 // First determine if it is required or is profitable to flip the operands.
4905
4906 // If LHS is a foldable load, but RHS is not, flip the condition.
4907 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4908 !ISD::isNON_EXTLoad(RHS.getNode())) {
4909 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4910 std::swap(LHS, RHS);
4911 }
4912
4913 switch (SetCCOpcode) {
4914 default: break;
4915 case ISD::SETOLT:
4916 case ISD::SETOLE:
4917 case ISD::SETUGT:
4918 case ISD::SETUGE:
4919 std::swap(LHS, RHS);
4920 break;
4921 }
4922
4923 // On a floating point condition, the flags are set as follows:
4924 // ZF PF CF op
4925 // 0 | 0 | 0 | X > Y
4926 // 0 | 0 | 1 | X < Y
4927 // 1 | 0 | 0 | X == Y
4928 // 1 | 1 | 1 | unordered
4929 switch (SetCCOpcode) {
4930 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4930)
;
4931 case ISD::SETUEQ:
4932 case ISD::SETEQ: return X86::COND_E;
4933 case ISD::SETOLT: // flipped
4934 case ISD::SETOGT:
4935 case ISD::SETGT: return X86::COND_A;
4936 case ISD::SETOLE: // flipped
4937 case ISD::SETOGE:
4938 case ISD::SETGE: return X86::COND_AE;
4939 case ISD::SETUGT: // flipped
4940 case ISD::SETULT:
4941 case ISD::SETLT: return X86::COND_B;
4942 case ISD::SETUGE: // flipped
4943 case ISD::SETULE:
4944 case ISD::SETLE: return X86::COND_BE;
4945 case ISD::SETONE:
4946 case ISD::SETNE: return X86::COND_NE;
4947 case ISD::SETUO: return X86::COND_P;
4948 case ISD::SETO: return X86::COND_NP;
4949 case ISD::SETOEQ:
4950 case ISD::SETUNE: return X86::COND_INVALID;
4951 }
4952}
4953
4954/// Is there a floating point cmov for the specific X86 condition code?
4955/// Current x86 isa includes the following FP cmov instructions:
4956/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4957static bool hasFPCMov(unsigned X86CC) {
4958 switch (X86CC) {
4959 default:
4960 return false;
4961 case X86::COND_B:
4962 case X86::COND_BE:
4963 case X86::COND_E:
4964 case X86::COND_P:
4965 case X86::COND_A:
4966 case X86::COND_AE:
4967 case X86::COND_NE:
4968 case X86::COND_NP:
4969 return true;
4970 }
4971}
4972
4973
4974bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4975 const CallInst &I,
4976 MachineFunction &MF,
4977 unsigned Intrinsic) const {
4978
4979 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4980 if (!IntrData)
4981 return false;
4982
4983 Info.flags = MachineMemOperand::MONone;
4984 Info.offset = 0;
4985
4986 switch (IntrData->Type) {
4987 case TRUNCATE_TO_MEM_VI8:
4988 case TRUNCATE_TO_MEM_VI16:
4989 case TRUNCATE_TO_MEM_VI32: {
4990 Info.opc = ISD::INTRINSIC_VOID;
4991 Info.ptrVal = I.getArgOperand(0);
4992 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4993 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4994 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4995 ScalarVT = MVT::i8;
4996 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4997 ScalarVT = MVT::i16;
4998 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4999 ScalarVT = MVT::i32;
5000
5001 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5002 Info.align = Align(1);
5003 Info.flags |= MachineMemOperand::MOStore;
5004 break;
5005 }
5006 case GATHER:
5007 case GATHER_AVX2: {
5008 Info.opc = ISD::INTRINSIC_W_CHAIN;
5009 Info.ptrVal = nullptr;
5010 MVT DataVT = MVT::getVT(I.getType());
5011 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5012 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5013 IndexVT.getVectorNumElements());
5014 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5015 Info.align = Align(1);
5016 Info.flags |= MachineMemOperand::MOLoad;
5017 break;
5018 }
5019 case SCATTER: {
5020 Info.opc = ISD::INTRINSIC_VOID;
5021 Info.ptrVal = nullptr;
5022 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5023 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5024 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5025 IndexVT.getVectorNumElements());
5026 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5027 Info.align = Align(1);
5028 Info.flags |= MachineMemOperand::MOStore;
5029 break;
5030 }
5031 default:
5032 return false;
5033 }
5034
5035 return true;
5036}
5037
5038/// Returns true if the target can instruction select the
5039/// specified FP immediate natively. If false, the legalizer will
5040/// materialize the FP immediate as a load from a constant pool.
5041bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5042 bool ForCodeSize) const {
5043 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5044 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5045 return true;
5046 }
5047 return false;
5048}
5049
5050bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5051 ISD::LoadExtType ExtTy,
5052 EVT NewVT) const {
5053 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"
) ? static_cast<void> (0) : __assert_fail ("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5053, __PRETTY_FUNCTION__))
;
5054
5055 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5056 // relocation target a movq or addq instruction: don't let the load shrink.
5057 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5058 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5059 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5060 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5061
5062 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5063 // those uses are extracted directly into a store, then the extract + store
5064 // can be store-folded. Therefore, it's probably not worth splitting the load.
5065 EVT VT = Load->getValueType(0);
5066 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5067 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5068 // Skip uses of the chain value. Result 0 of the node is the load value.
5069 if (UI.getUse().getResNo() != 0)
5070 continue;
5071
5072 // If this use is not an extract + store, it's probably worth splitting.
5073 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5074 UI->use_begin()->getOpcode() != ISD::STORE)
5075 return true;
5076 }
5077 // All non-chain uses are extract + store.
5078 return false;
5079 }
5080
5081 return true;
5082}
5083
5084/// Returns true if it is beneficial to convert a load of a constant
5085/// to just the constant itself.
5086bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5087 Type *Ty) const {
5088 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5088, __PRETTY_FUNCTION__))
;
5089
5090 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5091 if (BitSize == 0 || BitSize > 64)
5092 return false;
5093 return true;
5094}
5095
5096bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5097 // If we are using XMM registers in the ABI and the condition of the select is
5098 // a floating-point compare and we have blendv or conditional move, then it is
5099 // cheaper to select instead of doing a cross-register move and creating a
5100 // load that depends on the compare result.
5101 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5102 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5103}
5104
5105bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5106 // TODO: It might be a win to ease or lift this restriction, but the generic
5107 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5108 if (VT.isVector() && Subtarget.hasAVX512())
5109 return false;
5110
5111 return true;
5112}
5113
5114bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5115 SDValue C) const {
5116 // TODO: We handle scalars using custom code, but generic combining could make
5117 // that unnecessary.
5118 APInt MulC;
5119 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5120 return false;
5121
5122 // Find the type this will be legalized too. Otherwise we might prematurely
5123 // convert this to shl+add/sub and then still have to type legalize those ops.
5124 // Another choice would be to defer the decision for illegal types until
5125 // after type legalization. But constant splat vectors of i64 can't make it
5126 // through type legalization on 32-bit targets so we would need to special
5127 // case vXi64.
5128 while (getTypeAction(Context, VT) != TypeLegal)
5129 VT = getTypeToTransformTo(Context, VT);
5130
5131 // If vector multiply is legal, assume that's faster than shl + add/sub.
5132 // TODO: Multiply is a complex op with higher latency and lower throughput in
5133 // most implementations, so this check could be loosened based on type
5134 // and/or a CPU attribute.
5135 if (isOperationLegal(ISD::MUL, VT))
5136 return false;
5137
5138 // shl+add, shl+sub, shl+add+neg
5139 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5140 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5141}
5142
5143bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5144 unsigned Index) const {
5145 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5146 return false;
5147
5148 // Mask vectors support all subregister combinations and operations that
5149 // extract half of vector.
5150 if (ResVT.getVectorElementType() == MVT::i1)
5151 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5152 (Index == ResVT.getVectorNumElements()));
5153
5154 return (Index % ResVT.getVectorNumElements()) == 0;
5155}
5156
5157bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5158 unsigned Opc = VecOp.getOpcode();
5159
5160 // Assume target opcodes can't be scalarized.
5161 // TODO - do we have any exceptions?
5162 if (Opc >= ISD::BUILTIN_OP_END)
5163 return false;
5164
5165 // If the vector op is not supported, try to convert to scalar.
5166 EVT VecVT = VecOp.getValueType();
5167 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5168 return true;
5169
5170 // If the vector op is supported, but the scalar op is not, the transform may
5171 // not be worthwhile.
5172 EVT ScalarVT = VecVT.getScalarType();
5173 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5174}
5175
5176bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5177 bool) const {
5178 // TODO: Allow vectors?
5179 if (VT.isVector())
5180 return false;
5181 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5182}
5183
5184bool X86TargetLowering::isCheapToSpeculateCttz() const {
5185 // Speculate cttz only if we can directly use TZCNT.
5186 return Subtarget.hasBMI();
5187}
5188
5189bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5190 // Speculate ctlz only if we can directly use LZCNT.
5191 return Subtarget.hasLZCNT();
5192}
5193
5194bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5195 const SelectionDAG &DAG,
5196 const MachineMemOperand &MMO) const {
5197 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5198 BitcastVT.getVectorElementType() == MVT::i1)
5199 return false;
5200
5201 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5202 return false;
5203
5204 // If both types are legal vectors, it's always ok to convert them.
5205 if (LoadVT.isVector() && BitcastVT.isVector() &&
5206 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5207 return true;
5208
5209 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5210}
5211
5212bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5213 const SelectionDAG &DAG) const {
5214 // Do not merge to float value size (128 bytes) if no implicit
5215 // float attribute is set.
5216 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5217 Attribute::NoImplicitFloat);
5218
5219 if (NoFloat) {
5220 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5221 return (MemVT.getSizeInBits() <= MaxIntSize);
5222 }
5223 // Make sure we don't merge greater than our preferred vector
5224 // width.
5225 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5226 return false;
5227 return true;
5228}
5229
5230bool X86TargetLowering::isCtlzFast() const {
5231 return Subtarget.hasFastLZCNT();
5232}
5233
5234bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5235 const Instruction &AndI) const {
5236 return true;
5237}
5238
5239bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5240 EVT VT = Y.getValueType();
5241
5242 if (VT.isVector())
5243 return false;
5244
5245 if (!Subtarget.hasBMI())
5246 return false;
5247
5248 // There are only 32-bit and 64-bit forms for 'andn'.
5249 if (VT != MVT::i32 && VT != MVT::i64)
5250 return false;
5251
5252 return !isa<ConstantSDNode>(Y);
5253}
5254
5255bool X86TargetLowering::hasAndNot(SDValue Y) const {
5256 EVT VT = Y.getValueType();
5257
5258 if (!VT.isVector())
5259 return hasAndNotCompare(Y);
5260
5261 // Vector.
5262
5263 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5264 return false;
5265
5266 if (VT == MVT::v4i32)
5267 return true;
5268
5269 return Subtarget.hasSSE2();
5270}
5271
5272bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5273 return X.getValueType().isScalarInteger(); // 'bt'
5274}
5275
5276bool X86TargetLowering::
5277 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5278 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5279 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5280 SelectionDAG &DAG) const {
5281 // Does baseline recommend not to perform the fold by default?
5282 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5283 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5284 return false;
5285 // For scalars this transform is always beneficial.
5286 if (X.getValueType().isScalarInteger())
5287 return true;
5288 // If all the shift amounts are identical, then transform is beneficial even
5289 // with rudimentary SSE2 shifts.
5290 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5291 return true;
5292 // If we have AVX2 with it's powerful shift operations, then it's also good.
5293 if (Subtarget.hasAVX2())
5294 return true;
5295 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5296 return NewShiftOpcode == ISD::SHL;
5297}
5298
5299bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5300 const SDNode *N, CombineLevel Level) const {
5301 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5305, __PRETTY_FUNCTION__))
5302 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5305, __PRETTY_FUNCTION__))
5303 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5305, __PRETTY_FUNCTION__))
5304 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5305, __PRETTY_FUNCTION__))
5305 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5305, __PRETTY_FUNCTION__))
;
5306 EVT VT = N->getValueType(0);
5307 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5308 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5309 // Only fold if the shift values are equal - so it folds to AND.
5310 // TODO - we should fold if either is a non-uniform vector but we don't do
5311 // the fold for non-splats yet.
5312 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5313 }
5314 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5315}
5316
5317bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5318 EVT VT = Y.getValueType();
5319
5320 // For vectors, we don't have a preference, but we probably want a mask.
5321 if (VT.isVector())
5322 return false;
5323
5324 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5325 if (VT == MVT::i64 && !Subtarget.is64Bit())
5326 return false;
5327
5328 return true;
5329}
5330
5331bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5332 SDNode *N) const {
5333 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5334 !Subtarget.isOSWindows())
5335 return false;
5336 return true;
5337}
5338
5339bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5340 // Any legal vector type can be splatted more efficiently than
5341 // loading/spilling from memory.
5342 return isTypeLegal(VT);
5343}
5344
5345MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5346 MVT VT = MVT::getIntegerVT(NumBits);
5347 if (isTypeLegal(VT))
5348 return VT;
5349
5350 // PMOVMSKB can handle this.
5351 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5352 return MVT::v16i8;
5353
5354 // VPMOVMSKB can handle this.
5355 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5356 return MVT::v32i8;
5357
5358 // TODO: Allow 64-bit type for 32-bit target.
5359 // TODO: 512-bit types should be allowed, but make sure that those
5360 // cases are handled in combineVectorSizedSetCCEquality().
5361
5362 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5363}
5364
5365/// Val is the undef sentinel value or equal to the specified value.
5366static bool isUndefOrEqual(int Val, int CmpVal) {
5367 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5368}
5369
5370/// Val is either the undef or zero sentinel value.
5371static bool isUndefOrZero(int Val) {
5372 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5373}
5374
5375/// Return true if every element in Mask, beginning from position Pos and ending
5376/// in Pos+Size is the undef sentinel value.
5377static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5378 return llvm::all_of(Mask.slice(Pos, Size),
5379 [](int M) { return M == SM_SentinelUndef; });
5380}
5381
5382/// Return true if the mask creates a vector whose lower half is undefined.
5383static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5384 unsigned NumElts = Mask.size();
5385 return isUndefInRange(Mask, 0, NumElts / 2);
5386}
5387
5388/// Return true if the mask creates a vector whose upper half is undefined.
5389static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5390 unsigned NumElts = Mask.size();
5391 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5392}
5393
5394/// Return true if Val falls within the specified range (L, H].
5395static bool isInRange(int Val, int Low, int Hi) {
5396 return (Val >= Low && Val < Hi);
5397}
5398
5399/// Return true if the value of any element in Mask falls within the specified
5400/// range (L, H].
5401static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5402 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5403}
5404
5405/// Return true if Val is undef or if its value falls within the
5406/// specified range (L, H].
5407static bool isUndefOrInRange(int Val, int Low, int Hi) {
5408 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5409}
5410
5411/// Return true if every element in Mask is undef or if its value
5412/// falls within the specified range (L, H].
5413static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5414 return llvm::all_of(
5415 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5416}
5417
5418/// Return true if Val is undef, zero or if its value falls within the
5419/// specified range (L, H].
5420static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5421 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5422}
5423
5424/// Return true if every element in Mask is undef, zero or if its value
5425/// falls within the specified range (L, H].
5426static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5427 return llvm::all_of(
5428 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5429}
5430
5431/// Return true if every element in Mask, beginning
5432/// from position Pos and ending in Pos + Size, falls within the specified
5433/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5434static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5435 unsigned Size, int Low, int Step = 1) {
5436 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5437 if (!isUndefOrEqual(Mask[i], Low))
5438 return false;
5439 return true;
5440}
5441
5442/// Return true if every element in Mask, beginning
5443/// from position Pos and ending in Pos+Size, falls within the specified
5444/// sequential range (Low, Low+Size], or is undef or is zero.
5445static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5446 unsigned Size, int Low,
5447 int Step = 1) {
5448 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5449 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5450 return false;
5451 return true;
5452}
5453
5454/// Return true if every element in Mask, beginning
5455/// from position Pos and ending in Pos+Size is undef or is zero.
5456static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5457 unsigned Size) {
5458 return llvm::all_of(Mask.slice(Pos, Size),
5459 [](int M) { return isUndefOrZero(M); });
5460}
5461
5462/// Helper function to test whether a shuffle mask could be
5463/// simplified by widening the elements being shuffled.
5464///
5465/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5466/// leaves it in an unspecified state.
5467///
5468/// NOTE: This must handle normal vector shuffle masks and *target* vector
5469/// shuffle masks. The latter have the special property of a '-2' representing
5470/// a zero-ed lane of a vector.
5471static bool canWidenShuffleElements(ArrayRef<int> Mask,
5472 SmallVectorImpl<int> &WidenedMask) {
5473 WidenedMask.assign(Mask.size() / 2, 0);
5474 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5475 int M0 = Mask[i];
5476 int M1 = Mask[i + 1];
5477
5478 // If both elements are undef, its trivial.
5479 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5480 WidenedMask[i / 2] = SM_SentinelUndef;
5481 continue;
5482 }
5483
5484 // Check for an undef mask and a mask value properly aligned to fit with
5485 // a pair of values. If we find such a case, use the non-undef mask's value.
5486 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5487 WidenedMask[i / 2] = M1 / 2;
5488 continue;
5489 }
5490 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5491 WidenedMask[i / 2] = M0 / 2;
5492 continue;
5493 }
5494
5495 // When zeroing, we need to spread the zeroing across both lanes to widen.
5496 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5497 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5498 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5499 WidenedMask[i / 2] = SM_SentinelZero;
5500 continue;
5501 }
5502 return false;
5503 }
5504
5505 // Finally check if the two mask values are adjacent and aligned with
5506 // a pair.
5507 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5508 WidenedMask[i / 2] = M0 / 2;
5509 continue;
5510 }
5511
5512 // Otherwise we can't safely widen the elements used in this shuffle.
5513 return false;
5514 }
5515 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5516, __PRETTY_FUNCTION__))
5516 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5516, __PRETTY_FUNCTION__))
;
5517
5518 return true;
5519}
5520
5521static bool canWidenShuffleElements(ArrayRef<int> Mask,
5522 const APInt &Zeroable,
5523 bool V2IsZero,
5524 SmallVectorImpl<int> &WidenedMask) {
5525 // Create an alternative mask with info about zeroable elements.
5526 // Here we do not set undef elements as zeroable.
5527 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5528 if (V2IsZero) {
5529 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((!Zeroable.isNullValue() && "V2's non-undef elements are used?!"
) ? static_cast<void> (0) : __assert_fail ("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5529, __PRETTY_FUNCTION__))
;
5530 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5531 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5532 ZeroableMask[i] = SM_SentinelZero;
5533 }
5534 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5535}
5536
5537static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5538 SmallVector<int, 32> WidenedMask;
5539 return canWidenShuffleElements(Mask, WidenedMask);
5540}
5541
5542/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5543bool X86::isZeroNode(SDValue Elt) {
5544 return isNullConstant(Elt) || isNullFPConstant(Elt);
5545}
5546
5547// Build a vector of constants.
5548// Use an UNDEF node if MaskElt == -1.
5549// Split 64-bit constants in the 32-bit mode.
5550static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5551 const SDLoc &dl, bool IsMask = false) {
5552
5553 SmallVector<SDValue, 32> Ops;
5554 bool Split = false;
5555
5556 MVT ConstVecVT = VT;
5557 unsigned NumElts = VT.getVectorNumElements();
5558 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5559 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5560 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5561 Split = true;
5562 }
5563
5564 MVT EltVT = ConstVecVT.getVectorElementType();
5565 for (unsigned i = 0; i < NumElts; ++i) {
5566 bool IsUndef = Values[i] < 0 && IsMask;
5567 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5568 DAG.getConstant(Values[i], dl, EltVT);
5569 Ops.push_back(OpNode);
5570 if (Split)
5571 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5572 DAG.getConstant(0, dl, EltVT));
5573 }
5574 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5575 if (Split)
5576 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5577 return ConstsNode;
5578}
5579
5580static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5581 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5582 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5583, __PRETTY_FUNCTION__))
5583 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5583, __PRETTY_FUNCTION__))
;
5584 SmallVector<SDValue, 32> Ops;
5585 bool Split = false;
5586
5587 MVT ConstVecVT = VT;
5588 unsigned NumElts = VT.getVectorNumElements();
5589 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5590 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5591 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5592 Split = true;
5593 }
5594
5595 MVT EltVT = ConstVecVT.getVectorElementType();
5596 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5597 if (Undefs[i]) {
5598 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5599 continue;
5600 }
5601 const APInt &V = Bits[i];
5602 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5602, __PRETTY_FUNCTION__))
;
5603 if (Split) {
5604 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5605 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5606 } else if (EltVT == MVT::f32) {
5607 APFloat FV(APFloat::IEEEsingle(), V);
5608 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5609 } else if (EltVT == MVT::f64) {
5610 APFloat FV(APFloat::IEEEdouble(), V);
5611 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5612 } else {
5613 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5614 }
5615 }
5616
5617 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5618 return DAG.getBitcast(VT, ConstsNode);
5619}
5620
5621/// Returns a vector of specified type with all zero elements.
5622static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5623 SelectionDAG &DAG, const SDLoc &dl) {
5624 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5626, __PRETTY_FUNCTION__))
5625 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5626, __PRETTY_FUNCTION__))
5626 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5626, __PRETTY_FUNCTION__))
;
5627
5628 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5629 // type. This ensures they get CSE'd. But if the integer type is not
5630 // available, use a floating-point +0.0 instead.
5631 SDValue Vec;
5632 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5633 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5634 } else if (VT.isFloatingPoint()) {
5635 Vec = DAG.getConstantFP(+0.0, dl, VT);
5636 } else if (VT.getVectorElementType() == MVT::i1) {
5637 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5638, __PRETTY_FUNCTION__))
5638 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5638, __PRETTY_FUNCTION__))
;
5639 Vec = DAG.getConstant(0, dl, VT);
5640 } else {
5641 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5642 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5643 }
5644 return DAG.getBitcast(VT, Vec);
5645}
5646
5647static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5648 const SDLoc &dl, unsigned vectorWidth) {
5649 EVT VT = Vec.getValueType();
5650 EVT ElVT = VT.getVectorElementType();
5651 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5652 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5653 VT.getVectorNumElements()/Factor);
5654
5655 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5656 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5657 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5657, __PRETTY_FUNCTION__))
;
5658
5659 // This is the index of the first element of the vectorWidth-bit chunk
5660 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5661 IdxVal &= ~(ElemsPerChunk - 1);
5662
5663 // If the input is a buildvector just emit a smaller one.
5664 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5665 return DAG.getBuildVector(ResultVT, dl,
5666 Vec->ops().slice(IdxVal, ElemsPerChunk));
5667
5668 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5669 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5670}
5671
5672/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5673/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5674/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5675/// instructions or a simple subregister reference. Idx is an index in the
5676/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5677/// lowering EXTRACT_VECTOR_ELT operations easier.
5678static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5679 SelectionDAG &DAG, const SDLoc &dl) {
5680 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5681, __PRETTY_FUNCTION__))
5681 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5681, __PRETTY_FUNCTION__))
;
5682 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5683}
5684
5685/// Generate a DAG to grab 256-bits from a 512-bit vector.
5686static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5687 SelectionDAG &DAG, const SDLoc &dl) {
5688 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5688, __PRETTY_FUNCTION__))
;
5689 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5690}
5691
5692static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5693 SelectionDAG &DAG, const SDLoc &dl,
5694 unsigned vectorWidth) {
5695 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5696, __PRETTY_FUNCTION__))
5696 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5696, __PRETTY_FUNCTION__))
;
5697 // Inserting UNDEF is Result
5698 if (Vec.isUndef())
5699 return Result;
5700 EVT VT = Vec.getValueType();
5701 EVT ElVT = VT.getVectorElementType();
5702 EVT ResultVT = Result.getValueType();
5703
5704 // Insert the relevant vectorWidth bits.
5705 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5706 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5706, __PRETTY_FUNCTION__))
;
5707
5708 // This is the index of the first element of the vectorWidth-bit chunk
5709 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5710 IdxVal &= ~(ElemsPerChunk - 1);
5711
5712 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5713 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5714}
5715
5716/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5717/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5718/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5719/// simple superregister reference. Idx is an index in the 128 bits
5720/// we want. It need not be aligned to a 128-bit boundary. That makes
5721/// lowering INSERT_VECTOR_ELT operations easier.
5722static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5723 SelectionDAG &DAG, const SDLoc &dl) {
5724 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5724, __PRETTY_FUNCTION__))
;
5725 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5726}
5727
5728/// Widen a vector to a larger size with the same scalar type, with the new
5729/// elements either zero or undef.
5730static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5731 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5732 const SDLoc &dl) {
5733 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5735, __PRETTY_FUNCTION__))
5734 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5735, __PRETTY_FUNCTION__))
5735 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5735, __PRETTY_FUNCTION__))
;
5736 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5737 : DAG.getUNDEF(VT);
5738 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5739 DAG.getIntPtrConstant(0, dl));
5740}
5741
5742/// Widen a vector to a larger size with the same scalar type, with the new
5743/// elements either zero or undef.
5744static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
5745 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5746 const SDLoc &dl, unsigned WideSizeInBits) {
5747 assert(Vec.getValueSizeInBits() < WideSizeInBits &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5749, __PRETTY_FUNCTION__))
5748 (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5749, __PRETTY_FUNCTION__))
5749 "Unsupported vector widening type")((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5749, __PRETTY_FUNCTION__))
;
5750 unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
5751 MVT SVT = Vec.getSimpleValueType().getScalarType();
5752 MVT VT = MVT::getVectorVT(SVT, WideNumElts);
5753 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
5754}
5755
5756// Helper function to collect subvector ops that are concatenated together,
5757// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
5758// The subvectors in Ops are guaranteed to be the same type.
5759static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
5760 assert(Ops.empty() && "Expected an empty ops vector")((Ops.empty() && "Expected an empty ops vector") ? static_cast
<void> (0) : __assert_fail ("Ops.empty() && \"Expected an empty ops vector\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5760, __PRETTY_FUNCTION__))
;
5761
5762 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
5763 Ops.append(N->op_begin(), N->op_end());
5764 return true;
5765 }
5766
5767 if (N->getOpcode() == ISD::INSERT_SUBVECTOR &&
5768 isa<ConstantSDNode>(N->getOperand(2))) {
5769 SDValue Src = N->getOperand(0);
5770 SDValue Sub = N->getOperand(1);
5771 const APInt &Idx = N->getConstantOperandAPInt(2);
5772 EVT VT = Src.getValueType();
5773 EVT SubVT = Sub.getValueType();
5774
5775 // TODO - Handle more general insert_subvector chains.
5776 if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
5777 Idx == (VT.getVectorNumElements() / 2) &&
5778 Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
5779 Src.getOperand(1).getValueType() == SubVT &&
5780 isNullConstant(Src.getOperand(2))) {
5781 Ops.push_back(Src.getOperand(1));
5782 Ops.push_back(Sub);
5783 return true;
5784 }
5785 }
5786
5787 return false;
5788}
5789
5790// Helper for splitting operands of an operation to legal target size and
5791// apply a function on each part.
5792// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5793// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5794// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5795// The argument Builder is a function that will be applied on each split part:
5796// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5797template <typename F>
5798SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5799 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5800 F Builder, bool CheckBWI = true) {
5801 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5801, __PRETTY_FUNCTION__))
;
5802 unsigned NumSubs = 1;
5803 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5804 (!CheckBWI && Subtarget.useAVX512Regs())) {
5805 if (VT.getSizeInBits() > 512) {
5806 NumSubs = VT.getSizeInBits() / 512;
5807 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5807, __PRETTY_FUNCTION__))
;
5808 }
5809 } else if (Subtarget.hasAVX2()) {
5810 if (VT.getSizeInBits() > 256) {
5811 NumSubs = VT.getSizeInBits() / 256;
5812 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5812, __PRETTY_FUNCTION__))
;
5813 }
5814 } else {
5815 if (VT.getSizeInBits() > 128) {
5816 NumSubs = VT.getSizeInBits() / 128;
5817 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5817, __PRETTY_FUNCTION__))
;
5818 }
5819 }
5820
5821 if (NumSubs == 1)
5822 return Builder(DAG, DL, Ops);
5823
5824 SmallVector<SDValue, 4> Subs;
5825 for (unsigned i = 0; i != NumSubs; ++i) {
5826 SmallVector<SDValue, 2> SubOps;
5827 for (SDValue Op : Ops) {
5828 EVT OpVT = Op.getValueType();
5829 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5830 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5831 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5832 }
5833 Subs.push_back(Builder(DAG, DL, SubOps));
5834 }
5835 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5836}
5837
5838/// Insert i1-subvector to i1-vector.
5839static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5840 const X86Subtarget &Subtarget) {
5841
5842 SDLoc dl(Op);
5843 SDValue Vec = Op.getOperand(0);
5844 SDValue SubVec = Op.getOperand(1);
5845 SDValue Idx = Op.getOperand(2);
5846
5847 if (!isa<ConstantSDNode>(Idx))
5848 return SDValue();
5849
5850 // Inserting undef is a nop. We can just return the original vector.
5851 if (SubVec.isUndef())
5852 return Vec;
5853
5854 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5855 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5856 return Op;
5857
5858 MVT OpVT = Op.getSimpleValueType();
5859 unsigned NumElems = OpVT.getVectorNumElements();
5860
5861 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5862
5863 // Extend to natively supported kshift.
5864 MVT WideOpVT = OpVT;
5865 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5866 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5867
5868 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5869 // if necessary.
5870 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5871 // May need to promote to a legal type.
5872 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5873 DAG.getConstant(0, dl, WideOpVT),
5874 SubVec, Idx);
5875 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5876 }
5877
5878 MVT SubVecVT = SubVec.getSimpleValueType();
5879 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5880
5881 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5883, __PRETTY_FUNCTION__))
5882 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-11~++20200224111112+6e561d1c94e/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5883, __PRETTY_FUNCTION__))
5883 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal