Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 16948, column 21
The result of the '/' expression is undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/X86 -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/X86 -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/X86/X86ISelLowering.cpp
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/IntrinsicLowering.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineJumpTableInfo.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetLowering.h"
43#include "llvm/CodeGen/WinEHFuncInfo.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/DiagnosticInfo.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/PatternMatch.h"
55#include "llvm/MC/MCAsmInfo.h"
56#include "llvm/MC/MCContext.h"
57#include "llvm/MC/MCExpr.h"
58#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <bitset>
67#include <cctype>
68#include <numeric>
69using namespace llvm;
70
71#define DEBUG_TYPE"x86-isel" "x86-isel"
72
73STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
74
75static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
76 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
77 cl::desc(
78 "Sets the preferable loop alignment for experiments (as log2 bytes) "
79 "for innermost loops only. If specified, this option overrides "
80 "alignment set by x86-experimental-pref-loop-alignment."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
108 const X86Subtarget &STI)
109 : TargetLowering(TM), Subtarget(STI) {
110 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
111 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
112
113 // Set up the TargetLowering object.
114
115 // X86 is weird. It always uses i8 for shift amounts and setcc results.
116 setBooleanContents(ZeroOrOneBooleanContent);
117 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
118 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
119
120 // For 64-bit, since we have so many registers, use the ILP scheduler.
121 // For 32-bit, use the register pressure specific scheduling.
122 // For Atom, always use ILP scheduling.
123 if (Subtarget.isAtom())
124 setSchedulingPreference(Sched::ILP);
125 else if (Subtarget.is64Bit())
126 setSchedulingPreference(Sched::ILP);
127 else
128 setSchedulingPreference(Sched::RegPressure);
129 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
130 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
131
132 // Bypass expensive divides and use cheaper ones.
133 if (TM.getOptLevel() >= CodeGenOpt::Default) {
134 if (Subtarget.hasSlowDivide32())
135 addBypassSlowDiv(32, 8);
136 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
137 addBypassSlowDiv(64, 32);
138 }
139
140 // Setup Windows compiler runtime calls.
141 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
142 static const struct {
143 const RTLIB::Libcall Op;
144 const char * const Name;
145 const CallingConv::ID CC;
146 } LibraryCalls[] = {
147 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
148 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
149 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
150 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
151 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
152 };
153
154 for (const auto &LC : LibraryCalls) {
155 setLibcallName(LC.Op, LC.Name);
156 setLibcallCallingConv(LC.Op, LC.CC);
157 }
158 }
159
160 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
161 // MSVCRT doesn't have powi; fall back to pow
162 setLibcallName(RTLIB::POWI_F32, nullptr);
163 setLibcallName(RTLIB::POWI_F64, nullptr);
164 }
165
166 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
167 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
168 // FIXME: Should we be limiting the atomic size on other configs? Default is
169 // 1024.
170 if (!Subtarget.canUseCMPXCHG8B())
171 setMaxAtomicSizeInBitsSupported(32);
172
173 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
174
175 // Set up the register classes.
176 addRegisterClass(MVT::i8, &X86::GR8RegClass);
177 addRegisterClass(MVT::i16, &X86::GR16RegClass);
178 addRegisterClass(MVT::i32, &X86::GR32RegClass);
179 if (Subtarget.is64Bit())
180 addRegisterClass(MVT::i64, &X86::GR64RegClass);
181
182 for (MVT VT : MVT::integer_valuetypes())
183 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
184
185 // We don't accept any truncstore of integer registers.
186 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
187 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
188 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
189 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
190 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
191 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
192
193 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
194
195 // SETOEQ and SETUNE require checking two conditions.
196 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
197 setCondCodeAction(ISD::SETOEQ, VT, Expand);
198 setCondCodeAction(ISD::SETUNE, VT, Expand);
199 }
200
201 // Integer absolute.
202 if (Subtarget.canUseCMOV()) {
203 setOperationAction(ISD::ABS , MVT::i16 , Custom);
204 setOperationAction(ISD::ABS , MVT::i32 , Custom);
205 if (Subtarget.is64Bit())
206 setOperationAction(ISD::ABS , MVT::i64 , Custom);
207 }
208
209 // Signed saturation subtraction.
210 setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
211 setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
212 setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
213 if (Subtarget.is64Bit())
214 setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
215
216 // Funnel shifts.
217 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
218 // For slow shld targets we only lower for code size.
219 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
220
221 setOperationAction(ShiftOp , MVT::i8 , Custom);
222 setOperationAction(ShiftOp , MVT::i16 , Custom);
223 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
224 if (Subtarget.is64Bit())
225 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
226 }
227
228 if (!Subtarget.useSoftFloat()) {
229 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
230 // operation.
231 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
232 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
233 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
234 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
235 // We have an algorithm for SSE2, and we turn this into a 64-bit
236 // FILD or VCVTUSI2SS/SD for other targets.
237 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
238 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
239 // We have an algorithm for SSE2->double, and we turn this into a
240 // 64-bit FILD followed by conditional FADD for other targets.
241 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
242 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
243
244 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
245 // this operation.
246 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
247 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
248 // SSE has no i16 to fp conversion, only i32. We promote in the handler
249 // to allow f80 to use i16 and f64 to use i16 with sse1 only
250 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
251 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
252 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
253 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
254 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
255 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
256 // are Legal, f80 is custom lowered.
257 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
258 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
259
260 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
261 // this operation.
262 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
263 // FIXME: This doesn't generate invalid exception when it should. PR44019.
264 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
265 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
267 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
268 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
269 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
270 // are Legal, f80 is custom lowered.
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
273
274 // Handle FP_TO_UINT by promoting the destination to a larger signed
275 // conversion.
276 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
277 // FIXME: This doesn't generate invalid exception when it should. PR44019.
278 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
279 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
280 // FIXME: This doesn't generate invalid exception when it should. PR44019.
281 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
283 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
284 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
285 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
286
287 setOperationAction(ISD::LRINT, MVT::f32, Custom);
288 setOperationAction(ISD::LRINT, MVT::f64, Custom);
289 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
290 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
291
292 if (!Subtarget.is64Bit()) {
293 setOperationAction(ISD::LRINT, MVT::i64, Custom);
294 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
295 }
296 }
297
298 if (Subtarget.hasSSE2()) {
299 // Custom lowering for saturating float to int conversions.
300 // We handle promotion to larger result types manually.
301 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
302 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
303 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
304 }
305 if (Subtarget.is64Bit()) {
306 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
307 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
308 }
309 }
310
311 // Handle address space casts between mixed sized pointers.
312 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
313 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
314
315 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
316 if (!Subtarget.hasSSE2()) {
317 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
318 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
319 if (Subtarget.is64Bit()) {
320 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
321 // Without SSE, i64->f64 goes through memory.
322 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
323 }
324 } else if (!Subtarget.is64Bit())
325 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
326
327 // Scalar integer divide and remainder are lowered to use operations that
328 // produce two results, to match the available instructions. This exposes
329 // the two-result form to trivial CSE, which is able to combine x/y and x%y
330 // into a single instruction.
331 //
332 // Scalar integer multiply-high is also lowered to use two-result
333 // operations, to match the available instructions. However, plain multiply
334 // (low) operations are left as Legal, as there are single-result
335 // instructions for this in x86. Using the two-result multiply instructions
336 // when both high and low results are needed must be arranged by dagcombine.
337 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
338 setOperationAction(ISD::MULHS, VT, Expand);
339 setOperationAction(ISD::MULHU, VT, Expand);
340 setOperationAction(ISD::SDIV, VT, Expand);
341 setOperationAction(ISD::UDIV, VT, Expand);
342 setOperationAction(ISD::SREM, VT, Expand);
343 setOperationAction(ISD::UREM, VT, Expand);
344 }
345
346 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
347 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
348 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
349 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
350 setOperationAction(ISD::BR_CC, VT, Expand);
351 setOperationAction(ISD::SELECT_CC, VT, Expand);
352 }
353 if (Subtarget.is64Bit())
354 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
356 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
358
359 setOperationAction(ISD::FREM , MVT::f32 , Expand);
360 setOperationAction(ISD::FREM , MVT::f64 , Expand);
361 setOperationAction(ISD::FREM , MVT::f80 , Expand);
362 setOperationAction(ISD::FREM , MVT::f128 , Expand);
363
364 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
365 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
366 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
367 }
368
369 // Promote the i8 variants and force them on up to i32 which has a shorter
370 // encoding.
371 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
372 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
373 // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
374 // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
375 // promote that too.
376 setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
377 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
378
379 if (!Subtarget.hasBMI()) {
380 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
381 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
382 if (Subtarget.is64Bit()) {
383 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
384 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
385 }
386 }
387
388 if (Subtarget.hasLZCNT()) {
389 // When promoting the i8 variants, force them to i32 for a shorter
390 // encoding.
391 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
392 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
393 } else {
394 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
395 if (VT == MVT::i64 && !Subtarget.is64Bit())
396 continue;
397 setOperationAction(ISD::CTLZ , VT, Custom);
398 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
399 }
400 }
401
402 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
403 ISD::STRICT_FP_TO_FP16}) {
404 // Special handling for half-precision floating point conversions.
405 // If we don't have F16C support, then lower half float conversions
406 // into library calls.
407 setOperationAction(
408 Op, MVT::f32,
409 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
410 // There's never any support for operations beyond MVT::f32.
411 setOperationAction(Op, MVT::f64, Expand);
412 setOperationAction(Op, MVT::f80, Expand);
413 setOperationAction(Op, MVT::f128, Expand);
414 }
415
416 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
417 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
418 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
419 setTruncStoreAction(VT, MVT::f16, Expand);
420 setTruncStoreAction(VT, MVT::bf16, Expand);
421
422 setOperationAction(ISD::BF16_TO_FP, VT, Expand);
423 setOperationAction(ISD::FP_TO_BF16, VT, Custom);
424 }
425
426 setOperationAction(ISD::PARITY, MVT::i8, Custom);
427 setOperationAction(ISD::PARITY, MVT::i16, Custom);
428 setOperationAction(ISD::PARITY, MVT::i32, Custom);
429 if (Subtarget.is64Bit())
430 setOperationAction(ISD::PARITY, MVT::i64, Custom);
431 if (Subtarget.hasPOPCNT()) {
432 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
433 // popcntw is longer to encode than popcntl and also has a false dependency
434 // on the dest that popcntl hasn't had since Cannon Lake.
435 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
436 } else {
437 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
438 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
439 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
440 if (Subtarget.is64Bit())
441 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
442 else
443 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
444 }
445
446 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
447
448 if (!Subtarget.hasMOVBE())
449 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
450
451 // X86 wants to expand cmov itself.
452 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
453 setOperationAction(ISD::SELECT, VT, Custom);
454 setOperationAction(ISD::SETCC, VT, Custom);
455 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
456 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
457 }
458 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
459 if (VT == MVT::i64 && !Subtarget.is64Bit())
460 continue;
461 setOperationAction(ISD::SELECT, VT, Custom);
462 setOperationAction(ISD::SETCC, VT, Custom);
463 }
464
465 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
466 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
467 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
468
469 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
470 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
471 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
472 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
473 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
474 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
475 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
476 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
477
478 // Darwin ABI issue.
479 for (auto VT : { MVT::i32, MVT::i64 }) {
480 if (VT == MVT::i64 && !Subtarget.is64Bit())
481 continue;
482 setOperationAction(ISD::ConstantPool , VT, Custom);
483 setOperationAction(ISD::JumpTable , VT, Custom);
484 setOperationAction(ISD::GlobalAddress , VT, Custom);
485 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
486 setOperationAction(ISD::ExternalSymbol , VT, Custom);
487 setOperationAction(ISD::BlockAddress , VT, Custom);
488 }
489
490 // 64-bit shl, sra, srl (iff 32-bit x86)
491 for (auto VT : { MVT::i32, MVT::i64 }) {
492 if (VT == MVT::i64 && !Subtarget.is64Bit())
493 continue;
494 setOperationAction(ISD::SHL_PARTS, VT, Custom);
495 setOperationAction(ISD::SRA_PARTS, VT, Custom);
496 setOperationAction(ISD::SRL_PARTS, VT, Custom);
497 }
498
499 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
500 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
501
502 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
503
504 // Expand certain atomics
505 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
506 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
507 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
508 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
509 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
510 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
511 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
512 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
513 }
514
515 if (!Subtarget.is64Bit())
516 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
517
518 if (Subtarget.canUseCMPXCHG16B())
519 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
520
521 // FIXME - use subtarget debug flags
522 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
523 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
524 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
525 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
526 }
527
528 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
529 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
530
531 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
532 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
533
534 setOperationAction(ISD::TRAP, MVT::Other, Legal);
535 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
536 if (Subtarget.isTargetPS())
537 setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
538 else
539 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
540
541 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
542 setOperationAction(ISD::VASTART , MVT::Other, Custom);
543 setOperationAction(ISD::VAEND , MVT::Other, Expand);
544 bool Is64Bit = Subtarget.is64Bit();
545 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
546 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
547
548 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
549 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
550
551 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
552
553 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
554 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
555 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
556
557 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
558
559 auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
560 setOperationAction(ISD::FABS, VT, Action);
561 setOperationAction(ISD::FNEG, VT, Action);
562 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
563 setOperationAction(ISD::FREM, VT, Action);
564 setOperationAction(ISD::FMA, VT, Action);
565 setOperationAction(ISD::FMINNUM, VT, Action);
566 setOperationAction(ISD::FMAXNUM, VT, Action);
567 setOperationAction(ISD::FMINIMUM, VT, Action);
568 setOperationAction(ISD::FMAXIMUM, VT, Action);
569 setOperationAction(ISD::FSIN, VT, Action);
570 setOperationAction(ISD::FCOS, VT, Action);
571 setOperationAction(ISD::FSINCOS, VT, Action);
572 setOperationAction(ISD::FSQRT, VT, Action);
573 setOperationAction(ISD::FPOW, VT, Action);
574 setOperationAction(ISD::FLOG, VT, Action);
575 setOperationAction(ISD::FLOG2, VT, Action);
576 setOperationAction(ISD::FLOG10, VT, Action);
577 setOperationAction(ISD::FEXP, VT, Action);
578 setOperationAction(ISD::FEXP2, VT, Action);
579 setOperationAction(ISD::FCEIL, VT, Action);
580 setOperationAction(ISD::FFLOOR, VT, Action);
581 setOperationAction(ISD::FNEARBYINT, VT, Action);
582 setOperationAction(ISD::FRINT, VT, Action);
583 setOperationAction(ISD::BR_CC, VT, Action);
584 setOperationAction(ISD::SETCC, VT, Action);
585 setOperationAction(ISD::SELECT, VT, Custom);
586 setOperationAction(ISD::SELECT_CC, VT, Action);
587 setOperationAction(ISD::FROUND, VT, Action);
588 setOperationAction(ISD::FROUNDEVEN, VT, Action);
589 setOperationAction(ISD::FTRUNC, VT, Action);
590 };
591
592 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
593 // f16, f32 and f64 use SSE.
594 // Set up the FP register classes.
595 addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
596 : &X86::FR16RegClass);
597 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
598 : &X86::FR32RegClass);
599 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
600 : &X86::FR64RegClass);
601
602 // Disable f32->f64 extload as we can only generate this in one instruction
603 // under optsize. So its easier to pattern match (fpext (load)) for that
604 // case instead of needing to emit 2 instructions for extload in the
605 // non-optsize case.
606 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
607
608 for (auto VT : { MVT::f32, MVT::f64 }) {
609 // Use ANDPD to simulate FABS.
610 setOperationAction(ISD::FABS, VT, Custom);
611
612 // Use XORP to simulate FNEG.
613 setOperationAction(ISD::FNEG, VT, Custom);
614
615 // Use ANDPD and ORPD to simulate FCOPYSIGN.
616 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
617
618 // These might be better off as horizontal vector ops.
619 setOperationAction(ISD::FADD, VT, Custom);
620 setOperationAction(ISD::FSUB, VT, Custom);
621
622 // We don't support sin/cos/fmod
623 setOperationAction(ISD::FSIN , VT, Expand);
624 setOperationAction(ISD::FCOS , VT, Expand);
625 setOperationAction(ISD::FSINCOS, VT, Expand);
626 }
627
628 // Half type will be promoted by default.
629 setF16Action(MVT::f16, Promote);
630 setOperationAction(ISD::FADD, MVT::f16, Promote);
631 setOperationAction(ISD::FSUB, MVT::f16, Promote);
632 setOperationAction(ISD::FMUL, MVT::f16, Promote);
633 setOperationAction(ISD::FDIV, MVT::f16, Promote);
634 setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall);
635 setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall);
636 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
637
638 setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote);
639 setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote);
640 setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote);
641 setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote);
642 setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote);
643 setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote);
644 setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote);
645 setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote);
646 setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
647 setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
648 setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
649 setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
650 setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
651 setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
652 setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote);
653 setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote);
654 setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote);
655 setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote);
656 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote);
657 setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote);
658 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote);
659 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote);
660 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
661 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
662 setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
663 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall);
664 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, LibCall);
665 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
666
667 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
668 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
669
670 // Lower this to MOVMSK plus an AND.
671 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
672 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
673
674 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
675 (UseX87 || Is64Bit)) {
676 // Use SSE for f32, x87 for f64.
677 // Set up the FP register classes.
678 addRegisterClass(MVT::f32, &X86::FR32RegClass);
679 if (UseX87)
680 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
681
682 // Use ANDPS to simulate FABS.
683 setOperationAction(ISD::FABS , MVT::f32, Custom);
684
685 // Use XORP to simulate FNEG.
686 setOperationAction(ISD::FNEG , MVT::f32, Custom);
687
688 if (UseX87)
689 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
690
691 // Use ANDPS and ORPS to simulate FCOPYSIGN.
692 if (UseX87)
693 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
694 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
695
696 // We don't support sin/cos/fmod
697 setOperationAction(ISD::FSIN , MVT::f32, Expand);
698 setOperationAction(ISD::FCOS , MVT::f32, Expand);
699 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
700
701 if (UseX87) {
702 // Always expand sin/cos functions even though x87 has an instruction.
703 setOperationAction(ISD::FSIN, MVT::f64, Expand);
704 setOperationAction(ISD::FCOS, MVT::f64, Expand);
705 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
706 }
707 } else if (UseX87) {
708 // f32 and f64 in x87.
709 // Set up the FP register classes.
710 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
711 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
712
713 for (auto VT : { MVT::f32, MVT::f64 }) {
714 setOperationAction(ISD::UNDEF, VT, Expand);
715 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
716
717 // Always expand sin/cos functions even though x87 has an instruction.
718 setOperationAction(ISD::FSIN , VT, Expand);
719 setOperationAction(ISD::FCOS , VT, Expand);
720 setOperationAction(ISD::FSINCOS, VT, Expand);
721 }
722 }
723
724 // Expand FP32 immediates into loads from the stack, save special cases.
725 if (isTypeLegal(MVT::f32)) {
726 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
727 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
728 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
729 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
730 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
731 } else // SSE immediates.
732 addLegalFPImmediate(APFloat(+0.0f)); // xorps
733 }
734 // Expand FP64 immediates into loads from the stack, save special cases.
735 if (isTypeLegal(MVT::f64)) {
736 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
737 addLegalFPImmediate(APFloat(+0.0)); // FLD0
738 addLegalFPImmediate(APFloat(+1.0)); // FLD1
739 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
740 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
741 } else // SSE immediates.
742 addLegalFPImmediate(APFloat(+0.0)); // xorpd
743 }
744 // Support fp16 0 immediate.
745 if (isTypeLegal(MVT::f16))
746 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
747
748 // Handle constrained floating-point operations of scalar.
749 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
750 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
751 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
752 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
753 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
754 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
755 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
756 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
757 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
758 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
759 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
760 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
761
762 // We don't support FMA.
763 setOperationAction(ISD::FMA, MVT::f64, Expand);
764 setOperationAction(ISD::FMA, MVT::f32, Expand);
765
766 // f80 always uses X87.
767 if (UseX87) {
768 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
769 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
770 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
771 {
772 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
773 addLegalFPImmediate(TmpFlt); // FLD0
774 TmpFlt.changeSign();
775 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
776
777 bool ignored;
778 APFloat TmpFlt2(+1.0);
779 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
780 &ignored);
781 addLegalFPImmediate(TmpFlt2); // FLD1
782 TmpFlt2.changeSign();
783 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
784 }
785
786 // Always expand sin/cos functions even though x87 has an instruction.
787 setOperationAction(ISD::FSIN , MVT::f80, Expand);
788 setOperationAction(ISD::FCOS , MVT::f80, Expand);
789 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
790
791 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
792 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
793 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
794 setOperationAction(ISD::FRINT, MVT::f80, Expand);
795 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
796 setOperationAction(ISD::FMA, MVT::f80, Expand);
797 setOperationAction(ISD::LROUND, MVT::f80, Expand);
798 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
799 setOperationAction(ISD::LRINT, MVT::f80, Custom);
800 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
801
802 // Handle constrained floating-point operations of scalar.
803 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
804 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
805 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
806 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
807 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
808 if (isTypeLegal(MVT::f16)) {
809 setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
810 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
811 } else {
812 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
813 }
814 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
815 // as Custom.
816 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
817 }
818
819 // f128 uses xmm registers, but most operations require libcalls.
820 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
821 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
822 : &X86::VR128RegClass);
823
824 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
825
826 setOperationAction(ISD::FADD, MVT::f128, LibCall);
827 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
828 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
829 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
830 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
831 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
832 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
833 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
834 setOperationAction(ISD::FMA, MVT::f128, LibCall);
835 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
836
837 setOperationAction(ISD::FABS, MVT::f128, Custom);
838 setOperationAction(ISD::FNEG, MVT::f128, Custom);
839 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
840
841 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
842 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
843 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
844 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
845 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
846 // No STRICT_FSINCOS
847 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
848 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
849
850 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
851 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
852 // We need to custom handle any FP_ROUND with an f128 input, but
853 // LegalizeDAG uses the result type to know when to run a custom handler.
854 // So we have to list all legal floating point result types here.
855 if (isTypeLegal(MVT::f32)) {
856 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
857 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
858 }
859 if (isTypeLegal(MVT::f64)) {
860 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
861 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
862 }
863 if (isTypeLegal(MVT::f80)) {
864 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
865 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
866 }
867
868 setOperationAction(ISD::SETCC, MVT::f128, Custom);
869
870 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
871 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
872 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
873 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
874 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
875 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
876 }
877
878 // Always use a library call for pow.
879 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
880 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
881 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
882 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
883
884 setOperationAction(ISD::FLOG, MVT::f80, Expand);
885 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
886 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
887 setOperationAction(ISD::FEXP, MVT::f80, Expand);
888 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
889 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
890 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
891
892 // Some FP actions are always expanded for vector types.
893 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
894 MVT::v4f32, MVT::v8f32, MVT::v16f32,
895 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
896 setOperationAction(ISD::FSIN, VT, Expand);
897 setOperationAction(ISD::FSINCOS, VT, Expand);
898 setOperationAction(ISD::FCOS, VT, Expand);
899 setOperationAction(ISD::FREM, VT, Expand);
900 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
901 setOperationAction(ISD::FPOW, VT, Expand);
902 setOperationAction(ISD::FLOG, VT, Expand);
903 setOperationAction(ISD::FLOG2, VT, Expand);
904 setOperationAction(ISD::FLOG10, VT, Expand);
905 setOperationAction(ISD::FEXP, VT, Expand);
906 setOperationAction(ISD::FEXP2, VT, Expand);
907 }
908
909 // First set operation action for all vector types to either promote
910 // (for widening) or expand (for scalarization). Then we will selectively
911 // turn on ones that can be effectively codegen'd.
912 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
913 setOperationAction(ISD::SDIV, VT, Expand);
914 setOperationAction(ISD::UDIV, VT, Expand);
915 setOperationAction(ISD::SREM, VT, Expand);
916 setOperationAction(ISD::UREM, VT, Expand);
917 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
918 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
919 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
920 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
921 setOperationAction(ISD::FMA, VT, Expand);
922 setOperationAction(ISD::FFLOOR, VT, Expand);
923 setOperationAction(ISD::FCEIL, VT, Expand);
924 setOperationAction(ISD::FTRUNC, VT, Expand);
925 setOperationAction(ISD::FRINT, VT, Expand);
926 setOperationAction(ISD::FNEARBYINT, VT, Expand);
927 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
928 setOperationAction(ISD::MULHS, VT, Expand);
929 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
930 setOperationAction(ISD::MULHU, VT, Expand);
931 setOperationAction(ISD::SDIVREM, VT, Expand);
932 setOperationAction(ISD::UDIVREM, VT, Expand);
933 setOperationAction(ISD::CTPOP, VT, Expand);
934 setOperationAction(ISD::CTTZ, VT, Expand);
935 setOperationAction(ISD::CTLZ, VT, Expand);
936 setOperationAction(ISD::ROTL, VT, Expand);
937 setOperationAction(ISD::ROTR, VT, Expand);
938 setOperationAction(ISD::BSWAP, VT, Expand);
939 setOperationAction(ISD::SETCC, VT, Expand);
940 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
941 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
942 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
943 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
944 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
945 setOperationAction(ISD::TRUNCATE, VT, Expand);
946 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
947 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
948 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
949 setOperationAction(ISD::SELECT_CC, VT, Expand);
950 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
951 setTruncStoreAction(InnerVT, VT, Expand);
952
953 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
954 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
955
956 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
957 // types, we have to deal with them whether we ask for Expansion or not.
958 // Setting Expand causes its own optimisation problems though, so leave
959 // them legal.
960 if (VT.getVectorElementType() == MVT::i1)
961 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
962
963 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
964 // split/scalarized right now.
965 if (VT.getVectorElementType() == MVT::f16 ||
966 VT.getVectorElementType() == MVT::bf16)
967 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
968 }
969 }
970
971 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
972 // with -msoft-float, disable use of MMX as well.
973 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
974 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
975 // No operations on x86mmx supported, everything uses intrinsics.
976 }
977
978 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
979 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
980 : &X86::VR128RegClass);
981
982 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
983 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
984 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
985 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
986 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
987 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
988 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
989 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
990
991 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
992 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
993
994 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
995 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
996 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
997 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
998 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
999 }
1000
1001 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
1002 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1003 : &X86::VR128RegClass);
1004
1005 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
1006 // registers cannot be used even for integer operations.
1007 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
1008 : &X86::VR128RegClass);
1009 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1010 : &X86::VR128RegClass);
1011 addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1012 : &X86::VR128RegClass);
1013 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1014 : &X86::VR128RegClass);
1015 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1016 : &X86::VR128RegClass);
1017
1018 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1019 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1020 setOperationAction(ISD::SDIV, VT, Custom);
1021 setOperationAction(ISD::SREM, VT, Custom);
1022 setOperationAction(ISD::UDIV, VT, Custom);
1023 setOperationAction(ISD::UREM, VT, Custom);
1024 }
1025
1026 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
1027 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
1028 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
1029
1030 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1031 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1032 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1033 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
1034 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
1035 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
1036 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
1037 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
1038 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
1039 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1040 setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
1041 setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
1042
1043 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
1044 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
1045 setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1046
1047 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1048 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1049 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
1050
1051 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1052 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1053 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
1054 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
1055 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
1056 }
1057
1058 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
1059 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
1060 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
1061 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
1062 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
1063 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
1064 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
1065 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
1066 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
1067 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
1068
1069 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1070 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1071 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1072 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1073
1074 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1075 setOperationAction(ISD::SETCC, VT, Custom);
1076 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1077 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1078 setOperationAction(ISD::CTPOP, VT, Custom);
1079 setOperationAction(ISD::ABS, VT, Custom);
1080
1081 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1082 // setcc all the way to isel and prefer SETGT in some isel patterns.
1083 setCondCodeAction(ISD::SETLT, VT, Custom);
1084 setCondCodeAction(ISD::SETLE, VT, Custom);
1085 }
1086
1087 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1088 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1089 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1090 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1091 setOperationAction(ISD::VSELECT, VT, Custom);
1092 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1093 }
1094
1095 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1096 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1097 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1098 setOperationAction(ISD::VSELECT, VT, Custom);
1099
1100 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1101 continue;
1102
1103 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1104 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1105 }
1106 setF16Action(MVT::v8f16, Expand);
1107 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
1108 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
1109 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
1110 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
1111
1112 // Custom lower v2i64 and v2f64 selects.
1113 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1114 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1115 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1116 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1117 setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
1118 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1119
1120 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
1121 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1122 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1123 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1124 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
1125 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1126
1127 // Custom legalize these to avoid over promotion or custom promotion.
1128 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1129 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1130 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1131 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1132 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1133 }
1134
1135 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1136 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
1137 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1138 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1139
1140 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1141 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1142
1143 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1144 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1145
1146 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1147 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1148 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1149 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1150 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1151
1152 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1153 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1154 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1155 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1156
1157 // We want to legalize this to an f64 load rather than an i64 load on
1158 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1159 // store.
1160 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1161 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1162 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1163 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1164 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1165 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1166
1167 // Add 32-bit vector stores to help vectorization opportunities.
1168 setOperationAction(ISD::STORE, MVT::v2i16, Custom);
1169 setOperationAction(ISD::STORE, MVT::v4i8, Custom);
1170
1171 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1172 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1173 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1174 if (!Subtarget.hasAVX512())
1175 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1176
1177 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1178 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1179 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1180
1181 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1182
1183 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1184 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1185 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1186 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1187 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1188 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1189
1190 // In the customized shift lowering, the legal v4i32/v2i64 cases
1191 // in AVX2 will be recognized.
1192 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1193 setOperationAction(ISD::SRL, VT, Custom);
1194 setOperationAction(ISD::SHL, VT, Custom);
1195 setOperationAction(ISD::SRA, VT, Custom);
1196 if (VT == MVT::v2i64) continue;
1197 setOperationAction(ISD::ROTL, VT, Custom);
1198 setOperationAction(ISD::ROTR, VT, Custom);
1199 setOperationAction(ISD::FSHL, VT, Custom);
1200 setOperationAction(ISD::FSHR, VT, Custom);
1201 }
1202
1203 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1204 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1205 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1206 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1207 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1208 }
1209
1210 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1211 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1212 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1213 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1214 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1215 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1216 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1217 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1218 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1219
1220 // These might be better off as horizontal vector ops.
1221 setOperationAction(ISD::ADD, MVT::i16, Custom);
1222 setOperationAction(ISD::ADD, MVT::i32, Custom);
1223 setOperationAction(ISD::SUB, MVT::i16, Custom);
1224 setOperationAction(ISD::SUB, MVT::i32, Custom);
1225 }
1226
1227 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1228 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1229 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1230 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1231 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1232 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1233 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1234 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1235 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1236 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1237 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1238 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1239 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1240 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1241
1242 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1243 }
1244
1245 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1246 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1247 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1248 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1249 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1250 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1251 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1252 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1253
1254 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1255 setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
1256 setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
1257
1258 // FIXME: Do we need to handle scalar-to-vector here?
1259 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1260 setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
1261
1262 // We directly match byte blends in the backend as they match the VSELECT
1263 // condition form.
1264 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1265
1266 // SSE41 brings specific instructions for doing vector sign extend even in
1267 // cases where we don't have SRA.
1268 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1269 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1270 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1271 }
1272
1273 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1274 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1275 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1276 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1277 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1278 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1279 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1280 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1281 }
1282
1283 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1284 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1285 // do the pre and post work in the vector domain.
1286 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1287 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1288 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1289 // so that DAG combine doesn't try to turn it into uint_to_fp.
1290 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1291 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1292 }
1293 }
1294
1295 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1296 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1297 }
1298
1299 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1300 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1301 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1302 setOperationAction(ISD::ROTL, VT, Custom);
1303 setOperationAction(ISD::ROTR, VT, Custom);
1304 }
1305
1306 // XOP can efficiently perform BITREVERSE with VPPERM.
1307 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1308 setOperationAction(ISD::BITREVERSE, VT, Custom);
1309
1310 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1311 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1312 setOperationAction(ISD::BITREVERSE, VT, Custom);
1313 }
1314
1315 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1316 bool HasInt256 = Subtarget.hasInt256();
1317
1318 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1319 : &X86::VR256RegClass);
1320 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1321 : &X86::VR256RegClass);
1322 addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1323 : &X86::VR256RegClass);
1324 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1325 : &X86::VR256RegClass);
1326 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1327 : &X86::VR256RegClass);
1328 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1329 : &X86::VR256RegClass);
1330 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1331 : &X86::VR256RegClass);
1332
1333 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1334 setOperationAction(ISD::FFLOOR, VT, Legal);
1335 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1336 setOperationAction(ISD::FCEIL, VT, Legal);
1337 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1338 setOperationAction(ISD::FTRUNC, VT, Legal);
1339 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1340 setOperationAction(ISD::FRINT, VT, Legal);
1341 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1342 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1343 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1344 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1345 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1346
1347 setOperationAction(ISD::FROUND, VT, Custom);
1348
1349 setOperationAction(ISD::FNEG, VT, Custom);
1350 setOperationAction(ISD::FABS, VT, Custom);
1351 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1352 }
1353
1354 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1355 // even though v8i16 is a legal type.
1356 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1357 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1358 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1359 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1360 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
1361 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1362 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
1363
1364 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
1365 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
1366 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
1367 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
1368 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
1369 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
1370
1371 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1372 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1373 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1374 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1375 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1376 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1377 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1378 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1379 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1380 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1381 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1382
1383 if (!Subtarget.hasAVX512())
1384 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1385
1386 // In the customized shift lowering, the legal v8i32/v4i64 cases
1387 // in AVX2 will be recognized.
1388 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1389 setOperationAction(ISD::SRL, VT, Custom);
1390 setOperationAction(ISD::SHL, VT, Custom);
1391 setOperationAction(ISD::SRA, VT, Custom);
1392 if (VT == MVT::v4i64) continue;
1393 setOperationAction(ISD::ROTL, VT, Custom);
1394 setOperationAction(ISD::ROTR, VT, Custom);
1395 setOperationAction(ISD::FSHL, VT, Custom);
1396 setOperationAction(ISD::FSHR, VT, Custom);
1397 }
1398
1399 // These types need custom splitting if their input is a 128-bit vector.
1400 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1401 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1402 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1403 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1404
1405 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1406 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1407 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1408 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1409 setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
1410 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1411 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1412
1413 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1414 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1415 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1416 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1417 }
1418
1419 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1420 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1421 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1422 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1423
1424 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1425 setOperationAction(ISD::SETCC, VT, Custom);
1426 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1427 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1428 setOperationAction(ISD::CTPOP, VT, Custom);
1429 setOperationAction(ISD::CTLZ, VT, Custom);
1430
1431 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1432 // setcc all the way to isel and prefer SETGT in some isel patterns.
1433 setCondCodeAction(ISD::SETLT, VT, Custom);
1434 setCondCodeAction(ISD::SETLE, VT, Custom);
1435 }
1436
1437 if (Subtarget.hasAnyFMA()) {
1438 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1439 MVT::v2f64, MVT::v4f64 }) {
1440 setOperationAction(ISD::FMA, VT, Legal);
1441 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1442 }
1443 }
1444
1445 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1446 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1447 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1448 }
1449
1450 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1451 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1452 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1453 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1454
1455 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1456 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1457 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1458 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1459 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1460 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1461 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1462 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1463
1464 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1465 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1466
1467 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1468 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1469 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1470 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1471 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1472
1473 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1474 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1475 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1476 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1477 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1478 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1479 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1480 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1481 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1482 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1483 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1484 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1485
1486 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1487 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1488 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1489 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1490 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1491 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1492 }
1493
1494 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1495 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1496 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1497 }
1498
1499 if (HasInt256) {
1500 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1501 // when we have a 256bit-wide blend with immediate.
1502 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1503 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1504
1505 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1506 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1507 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1508 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1509 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1510 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1511 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1512 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1513 }
1514 }
1515
1516 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1517 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1518 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1519 setOperationAction(ISD::MSTORE, VT, Legal);
1520 }
1521
1522 // Extract subvector is special because the value type
1523 // (result) is 128-bit but the source is 256-bit wide.
1524 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1525 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1526 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1527 }
1528
1529 // Custom lower several nodes for 256-bit types.
1530 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1531 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1532 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1533 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1534 setOperationAction(ISD::VSELECT, VT, Custom);
1535 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1536 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1537 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1538 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1539 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1540 setOperationAction(ISD::STORE, VT, Custom);
1541 }
1542 setF16Action(MVT::v16f16, Expand);
1543 setOperationAction(ISD::FADD, MVT::v16f16, Expand);
1544 setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
1545 setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
1546 setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
1547
1548 if (HasInt256) {
1549 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1550
1551 // Custom legalize 2x32 to get a little better code.
1552 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1553 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1554
1555 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1556 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1557 setOperationAction(ISD::MGATHER, VT, Custom);
1558 }
1559 }
1560
1561 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1562 Subtarget.hasF16C()) {
1563 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1564 setOperationAction(ISD::FP_ROUND, VT, Custom);
1565 setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1566 }
1567 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) {
1568 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1569 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
1570 }
1571 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1572 setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
1573 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1574 }
1575
1576 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1577 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
1578 }
1579
1580 // This block controls legalization of the mask vector sizes that are
1581 // available with AVX512. 512-bit vectors are in a separate block controlled
1582 // by useAVX512Regs.
1583 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1584 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1585 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1586 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1587 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1588 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1589
1590 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1591 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1592 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1593
1594 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1595 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1596 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1597 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1598 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1599 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1600 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1601 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1602 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1603 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1604 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1605 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1606
1607 // There is no byte sized k-register load or store without AVX512DQ.
1608 if (!Subtarget.hasDQI()) {
1609 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1610 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1611 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1612 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1613
1614 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1615 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1616 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1617 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1618 }
1619
1620 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1621 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1622 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1623 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1624 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1625 }
1626
1627 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1628 setOperationAction(ISD::VSELECT, VT, Expand);
1629
1630 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1631 setOperationAction(ISD::SETCC, VT, Custom);
1632 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1633 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1634 setOperationAction(ISD::SELECT, VT, Custom);
1635 setOperationAction(ISD::TRUNCATE, VT, Custom);
1636
1637 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1638 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1639 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1640 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1641 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1642 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1643 }
1644
1645 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1646 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1647 }
1648
1649 // This block controls legalization for 512-bit operations with 32/64 bit
1650 // elements. 512-bits can be disabled based on prefer-vector-width and
1651 // required-vector-width function attributes.
1652 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1653 bool HasBWI = Subtarget.hasBWI();
1654
1655 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1656 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1657 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1658 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1659 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1660 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1661 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1662
1663 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1664 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1665 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1666 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1667 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1668 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1669 if (HasBWI)
1670 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1671 }
1672
1673 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1674 setOperationAction(ISD::FNEG, VT, Custom);
1675 setOperationAction(ISD::FABS, VT, Custom);
1676 setOperationAction(ISD::FMA, VT, Legal);
1677 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1678 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1679 }
1680
1681 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1682 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1683 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1684 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1685 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1686 }
1687 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom);
1688 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom);
1689 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom);
1690 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom);
1691 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
1692 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
1693 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
1694 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
1695 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
1696 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
1697
1698 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1699 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1700 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1701 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1702 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1703 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1704 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1705 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1706 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1707 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1708 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1709
1710 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1711 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1712 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1713 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1714 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1715 if (HasBWI)
1716 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1717
1718 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1719 // to 512-bit rather than use the AVX2 instructions so that we can use
1720 // k-masks.
1721 if (!Subtarget.hasVLX()) {
1722 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1723 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1724 setOperationAction(ISD::MLOAD, VT, Custom);
1725 setOperationAction(ISD::MSTORE, VT, Custom);
1726 }
1727 }
1728
1729 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1730 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1731 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1732 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1733 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1734 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1735 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1736 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1737 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1738 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1739 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1740 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1741 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1742
1743 if (HasBWI) {
1744 // Extends from v64i1 masks to 512-bit vectors.
1745 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1746 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1747 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1748 }
1749
1750 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1751 setOperationAction(ISD::FFLOOR, VT, Legal);
1752 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1753 setOperationAction(ISD::FCEIL, VT, Legal);
1754 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1755 setOperationAction(ISD::FTRUNC, VT, Legal);
1756 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1757 setOperationAction(ISD::FRINT, VT, Legal);
1758 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1759 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1760 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1761 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1762 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1763
1764 setOperationAction(ISD::FROUND, VT, Custom);
1765 }
1766
1767 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1768 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1769 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1770 }
1771
1772 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1773 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1774 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1775 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1776
1777 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1778 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1779 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1780 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1781
1782 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1783 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1784 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1785 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1786 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1787 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1788 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1789 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1790
1791 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1792 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1793
1794 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1795
1796 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1797 setOperationAction(ISD::SRL, VT, Custom);
1798 setOperationAction(ISD::SHL, VT, Custom);
1799 setOperationAction(ISD::SRA, VT, Custom);
1800 setOperationAction(ISD::ROTL, VT, Custom);
1801 setOperationAction(ISD::ROTR, VT, Custom);
1802 setOperationAction(ISD::SETCC, VT, Custom);
1803
1804 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1805 // setcc all the way to isel and prefer SETGT in some isel patterns.
1806 setCondCodeAction(ISD::SETLT, VT, Custom);
1807 setCondCodeAction(ISD::SETLE, VT, Custom);
1808 }
1809 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1810 setOperationAction(ISD::SMAX, VT, Legal);
1811 setOperationAction(ISD::UMAX, VT, Legal);
1812 setOperationAction(ISD::SMIN, VT, Legal);
1813 setOperationAction(ISD::UMIN, VT, Legal);
1814 setOperationAction(ISD::ABS, VT, Legal);
1815 setOperationAction(ISD::CTPOP, VT, Custom);
1816 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1817 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1818 }
1819
1820 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1821 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1822 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1823 setOperationAction(ISD::CTLZ, VT, Custom);
1824 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1825 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1826 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1827 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1828 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1829 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1830 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1831 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1832 }
1833
1834 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1835 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1836 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1837 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1838 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1839 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1840
1841 if (Subtarget.hasDQI()) {
1842 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1843 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1844 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
1845 setOperationAction(Opc, MVT::v8i64, Custom);
1846 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1847 }
1848
1849 if (Subtarget.hasCDI()) {
1850 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1851 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1852 setOperationAction(ISD::CTLZ, VT, Legal);
1853 }
1854 } // Subtarget.hasCDI()
1855
1856 if (Subtarget.hasVPOPCNTDQ()) {
1857 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1858 setOperationAction(ISD::CTPOP, VT, Legal);
1859 }
1860
1861 // Extract subvector is special because the value type
1862 // (result) is 256-bit but the source is 512-bit wide.
1863 // 128-bit was made Legal under AVX1.
1864 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1865 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1866 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1867
1868 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1869 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
1870 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1871 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1872 setOperationAction(ISD::SELECT, VT, Custom);
1873 setOperationAction(ISD::VSELECT, VT, Custom);
1874 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1875 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1876 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1877 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1878 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1879 }
1880 setF16Action(MVT::v32f16, Expand);
1881 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
1882 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom);
1883 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
1884 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
1885 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1886 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1887 setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
1888 }
1889
1890 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1891 setOperationAction(ISD::MLOAD, VT, Legal);
1892 setOperationAction(ISD::MSTORE, VT, Legal);
1893 setOperationAction(ISD::MGATHER, VT, Custom);
1894 setOperationAction(ISD::MSCATTER, VT, Custom);
1895 }
1896 if (HasBWI) {
1897 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1898 setOperationAction(ISD::MLOAD, VT, Legal);
1899 setOperationAction(ISD::MSTORE, VT, Legal);
1900 }
1901 } else {
1902 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1903 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1904 }
1905
1906 if (Subtarget.hasVBMI2()) {
1907 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1908 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1909 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1910 setOperationAction(ISD::FSHL, VT, Custom);
1911 setOperationAction(ISD::FSHR, VT, Custom);
1912 }
1913
1914 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1915 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1916 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1917 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1918 }
1919 }// useAVX512Regs
1920
1921 // This block controls legalization for operations that don't have
1922 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1923 // narrower widths.
1924 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1925 // These operations are handled on non-VLX by artificially widening in
1926 // isel patterns.
1927
1928 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
1929 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
1930 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1931
1932 if (Subtarget.hasDQI()) {
1933 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1934 // v2f32 UINT_TO_FP is already custom under SSE2.
1935 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1937, __extension__
__PRETTY_FUNCTION__))
1936 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1937, __extension__
__PRETTY_FUNCTION__))
1937 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1937, __extension__
__PRETTY_FUNCTION__))
;
1938 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1939 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1940 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1941 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1942 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1943 }
1944
1945 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1946 setOperationAction(ISD::SMAX, VT, Legal);
1947 setOperationAction(ISD::UMAX, VT, Legal);
1948 setOperationAction(ISD::SMIN, VT, Legal);
1949 setOperationAction(ISD::UMIN, VT, Legal);
1950 setOperationAction(ISD::ABS, VT, Legal);
1951 }
1952
1953 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1954 setOperationAction(ISD::ROTL, VT, Custom);
1955 setOperationAction(ISD::ROTR, VT, Custom);
1956 }
1957
1958 // Custom legalize 2x32 to get a little better code.
1959 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1960 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1961
1962 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1963 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1964 setOperationAction(ISD::MSCATTER, VT, Custom);
1965
1966 if (Subtarget.hasDQI()) {
1967 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1968 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1969 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) {
1970 setOperationAction(Opc, MVT::v2i64, Custom);
1971 setOperationAction(Opc, MVT::v4i64, Custom);
1972 }
1973 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1974 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1975 }
1976
1977 if (Subtarget.hasCDI()) {
1978 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1979 setOperationAction(ISD::CTLZ, VT, Legal);
1980 }
1981 } // Subtarget.hasCDI()
1982
1983 if (Subtarget.hasVPOPCNTDQ()) {
1984 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1985 setOperationAction(ISD::CTPOP, VT, Legal);
1986 }
1987 }
1988
1989 // This block control legalization of v32i1/v64i1 which are available with
1990 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1991 // useBWIRegs.
1992 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1993 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1994 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1995
1996 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1997 setOperationAction(ISD::VSELECT, VT, Expand);
1998 setOperationAction(ISD::TRUNCATE, VT, Custom);
1999 setOperationAction(ISD::SETCC, VT, Custom);
2000 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2001 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
2002 setOperationAction(ISD::SELECT, VT, Custom);
2003 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2004 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2005 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
2006 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
2007 }
2008
2009 for (auto VT : { MVT::v16i1, MVT::v32i1 })
2010 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
2011
2012 // Extends from v32i1 masks to 256-bit vectors.
2013 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
2014 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
2015 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
2016
2017 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2018 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
2019 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
2020 }
2021
2022 // These operations are handled on non-VLX by artificially widening in
2023 // isel patterns.
2024 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
2025
2026 if (Subtarget.hasBITALG()) {
2027 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2028 setOperationAction(ISD::CTPOP, VT, Legal);
2029 }
2030 }
2031
2032 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2033 auto setGroup = [&] (MVT VT) {
2034 setOperationAction(ISD::FADD, VT, Legal);
2035 setOperationAction(ISD::STRICT_FADD, VT, Legal);
2036 setOperationAction(ISD::FSUB, VT, Legal);
2037 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
2038 setOperationAction(ISD::FMUL, VT, Legal);
2039 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
2040 setOperationAction(ISD::FDIV, VT, Legal);
2041 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
2042 setOperationAction(ISD::FSQRT, VT, Legal);
2043 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
2044
2045 setOperationAction(ISD::FFLOOR, VT, Legal);
2046 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
2047 setOperationAction(ISD::FCEIL, VT, Legal);
2048 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
2049 setOperationAction(ISD::FTRUNC, VT, Legal);
2050 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
2051 setOperationAction(ISD::FRINT, VT, Legal);
2052 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
2053 setOperationAction(ISD::FNEARBYINT, VT, Legal);
2054 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
2055
2056 setOperationAction(ISD::LOAD, VT, Legal);
2057 setOperationAction(ISD::STORE, VT, Legal);
2058
2059 setOperationAction(ISD::FMA, VT, Legal);
2060 setOperationAction(ISD::STRICT_FMA, VT, Legal);
2061 setOperationAction(ISD::VSELECT, VT, Legal);
2062 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2063 setOperationAction(ISD::SELECT, VT, Custom);
2064
2065 setOperationAction(ISD::FNEG, VT, Custom);
2066 setOperationAction(ISD::FABS, VT, Custom);
2067 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
2068 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2069 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2070 };
2071
2072 // AVX512_FP16 scalar operations
2073 setGroup(MVT::f16);
2074 setOperationAction(ISD::FREM, MVT::f16, Promote);
2075 setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
2076 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
2077 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
2078 setOperationAction(ISD::SETCC, MVT::f16, Custom);
2079 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
2080 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
2081 setOperationAction(ISD::FROUND, MVT::f16, Custom);
2082 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
2083 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
2084 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
2085 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
2086 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
2087 setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
2088 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
2089
2090 setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
2091 setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
2092
2093 if (Subtarget.useAVX512Regs()) {
2094 setGroup(MVT::v32f16);
2095 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
2096 setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
2097 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
2098 setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
2099 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
2100 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
2101 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
2102 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
2103 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
2104 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal);
2105 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
2106 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
2107
2108 setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
2109 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
2110 setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
2111 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
2112 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
2113 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
2114 MVT::v32i16);
2115 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2116 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
2117 MVT::v32i16);
2118 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2119 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
2120 MVT::v32i16);
2121 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2122 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
2123 MVT::v32i16);
2124
2125 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
2126 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
2127 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
2128
2129 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2130 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2131
2132 setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
2133 setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
2134 }
2135
2136 if (Subtarget.hasVLX()) {
2137 setGroup(MVT::v8f16);
2138 setGroup(MVT::v16f16);
2139
2140 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
2141 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
2142 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
2143 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
2144 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
2145 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
2146 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
2147 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
2148 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
2149 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
2150
2151 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
2152 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
2153 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
2154 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
2155 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
2156 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
2157 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
2158 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
2159 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
2160 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
2161
2162 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
2164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
2165
2166 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
2167 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
2168 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
2169
2170 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2171 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2172 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2173 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2174
2175 // Need to custom widen these to prevent scalarization.
2176 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2177 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2178 }
2179 }
2180
2181 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2182 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2183 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2184 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2185 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2186 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2187
2188 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2189 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2190 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2191 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2192 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2193
2194 if (Subtarget.hasBWI()) {
2195 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2196 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2197 }
2198
2199 if (Subtarget.hasFP16()) {
2200 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2201 setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
2202 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
2203 setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
2204 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
2205 setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
2206 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
2207 setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
2208 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
2209 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2210 setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
2211 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
2212 setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
2213 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
2214 setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
2215 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
2216 setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
2217 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
2218 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2219 setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
2220 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
2221 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
2222 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
2223 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2224 setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
2225 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
2226 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
2227 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
2228 }
2229
2230 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
2231 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
2232 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
2233 }
2234
2235 if (Subtarget.hasAMXTILE()) {
2236 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2237 }
2238
2239 // We want to custom lower some of our intrinsics.
2240 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
2241 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
2242 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
2243 if (!Subtarget.is64Bit()) {
2244 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
2245 }
2246
2247 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2248 // handle type legalization for these operations here.
2249 //
2250 // FIXME: We really should do custom legalization for addition and
2251 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2252 // than generic legalization for 64-bit multiplication-with-overflow, though.
2253 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2254 if (VT == MVT::i64 && !Subtarget.is64Bit())
2255 continue;
2256 // Add/Sub/Mul with overflow operations are custom lowered.
2257 setOperationAction(ISD::SADDO, VT, Custom);
2258 setOperationAction(ISD::UADDO, VT, Custom);
2259 setOperationAction(ISD::SSUBO, VT, Custom);
2260 setOperationAction(ISD::USUBO, VT, Custom);
2261 setOperationAction(ISD::SMULO, VT, Custom);
2262 setOperationAction(ISD::UMULO, VT, Custom);
2263
2264 // Support carry in as value rather than glue.
2265 setOperationAction(ISD::ADDCARRY, VT, Custom);
2266 setOperationAction(ISD::SUBCARRY, VT, Custom);
2267 setOperationAction(ISD::SETCCCARRY, VT, Custom);
2268 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
2269 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
2270 }
2271
2272 if (!Subtarget.is64Bit()) {
2273 // These libcalls are not available in 32-bit.
2274 setLibcallName(RTLIB::SHL_I128, nullptr);
2275 setLibcallName(RTLIB::SRL_I128, nullptr);
2276 setLibcallName(RTLIB::SRA_I128, nullptr);
2277 setLibcallName(RTLIB::MUL_I128, nullptr);
2278 // The MULO libcall is not part of libgcc, only compiler-rt.
2279 setLibcallName(RTLIB::MULO_I64, nullptr);
2280 }
2281 // The MULO libcall is not part of libgcc, only compiler-rt.
2282 setLibcallName(RTLIB::MULO_I128, nullptr);
2283
2284 // Combine sin / cos into _sincos_stret if it is available.
2285 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2286 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2287 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
2288 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
2289 }
2290
2291 if (Subtarget.isTargetWin64()) {
2292 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2293 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2294 setOperationAction(ISD::SREM, MVT::i128, Custom);
2295 setOperationAction(ISD::UREM, MVT::i128, Custom);
2296 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
2297 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
2298 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
2299 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
2300 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
2301 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
2302 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
2303 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
2304 }
2305
2306 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2307 // is. We should promote the value to 64-bits to solve this.
2308 // This is what the CRT headers do - `fmodf` is an inline header
2309 // function casting to f64 and calling `fmod`.
2310 if (Subtarget.is32Bit() &&
2311 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2312 for (ISD::NodeType Op :
2313 {ISD::FCEIL, ISD::STRICT_FCEIL,
2314 ISD::FCOS, ISD::STRICT_FCOS,
2315 ISD::FEXP, ISD::STRICT_FEXP,
2316 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2317 ISD::FREM, ISD::STRICT_FREM,
2318 ISD::FLOG, ISD::STRICT_FLOG,
2319 ISD::FLOG10, ISD::STRICT_FLOG10,
2320 ISD::FPOW, ISD::STRICT_FPOW,
2321 ISD::FSIN, ISD::STRICT_FSIN})
2322 if (isOperationExpand(Op, MVT::f32))
2323 setOperationAction(Op, MVT::f32, Promote);
2324
2325 // We have target-specific dag combine patterns for the following nodes:
2326 setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
2327 ISD::SCALAR_TO_VECTOR,
2328 ISD::INSERT_VECTOR_ELT,
2329 ISD::EXTRACT_VECTOR_ELT,
2330 ISD::CONCAT_VECTORS,
2331 ISD::INSERT_SUBVECTOR,
2332 ISD::EXTRACT_SUBVECTOR,
2333 ISD::BITCAST,
2334 ISD::VSELECT,
2335 ISD::SELECT,
2336 ISD::SHL,
2337 ISD::SRA,
2338 ISD::SRL,
2339 ISD::OR,
2340 ISD::AND,
2341 ISD::ADD,
2342 ISD::FADD,
2343 ISD::FSUB,
2344 ISD::FNEG,
2345 ISD::FMA,
2346 ISD::STRICT_FMA,
2347 ISD::FMINNUM,
2348 ISD::FMAXNUM,
2349 ISD::SUB,
2350 ISD::LOAD,
2351 ISD::MLOAD,
2352 ISD::STORE,
2353 ISD::MSTORE,
2354 ISD::TRUNCATE,
2355 ISD::ZERO_EXTEND,
2356 ISD::ANY_EXTEND,
2357 ISD::SIGN_EXTEND,
2358 ISD::SIGN_EXTEND_INREG,
2359 ISD::ANY_EXTEND_VECTOR_INREG,
2360 ISD::SIGN_EXTEND_VECTOR_INREG,
2361 ISD::ZERO_EXTEND_VECTOR_INREG,
2362 ISD::SINT_TO_FP,
2363 ISD::UINT_TO_FP,
2364 ISD::STRICT_SINT_TO_FP,
2365 ISD::STRICT_UINT_TO_FP,
2366 ISD::SETCC,
2367 ISD::MUL,
2368 ISD::XOR,
2369 ISD::MSCATTER,
2370 ISD::MGATHER,
2371 ISD::FP16_TO_FP,
2372 ISD::FP_EXTEND,
2373 ISD::STRICT_FP_EXTEND,
2374 ISD::FP_ROUND,
2375 ISD::STRICT_FP_ROUND});
2376
2377 computeRegisterProperties(Subtarget.getRegisterInfo());
2378
2379 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2380 MaxStoresPerMemsetOptSize = 8;
2381 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2382 MaxStoresPerMemcpyOptSize = 4;
2383 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2384 MaxStoresPerMemmoveOptSize = 4;
2385
2386 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2387 // that needs to benchmarked and balanced with the potential use of vector
2388 // load/store types (PR33329, PR33914).
2389 MaxLoadsPerMemcmp = 2;
2390 MaxLoadsPerMemcmpOptSize = 2;
2391
2392 // Default loop alignment, which can be overridden by -align-loops.
2393 setPrefLoopAlignment(Align(16));
2394
2395 // An out-of-order CPU can speculatively execute past a predictable branch,
2396 // but a conditional move could be stalled by an expensive earlier operation.
2397 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2398 EnableExtLdPromotion = true;
2399 setPrefFunctionAlignment(Align(16));
2400
2401 verifyIntrinsicTables();
2402
2403 // Default to having -disable-strictnode-mutation on
2404 IsStrictFPEnabled = true;
2405}
2406
2407// This has so far only been implemented for 64-bit MachO.
2408bool X86TargetLowering::useLoadStackGuardNode() const {
2409 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2410}
2411
2412bool X86TargetLowering::useStackGuardXorFP() const {
2413 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2414 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2415}
2416
2417SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2418 const SDLoc &DL) const {
2419 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2420 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2421 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2422 return SDValue(Node, 0);
2423}
2424
2425TargetLoweringBase::LegalizeTypeAction
2426X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2427 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2428 !Subtarget.hasBWI())
2429 return TypeSplitVector;
2430
2431 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2432 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
2433 return TypeSplitVector;
2434
2435 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2436 VT.getVectorElementType() != MVT::i1)
2437 return TypeWidenVector;
2438
2439 return TargetLoweringBase::getPreferredVectorAction(VT);
2440}
2441
2442static std::pair<MVT, unsigned>
2443handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2444 const X86Subtarget &Subtarget) {
2445 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2446 // convention is one that uses k registers.
2447 if (NumElts == 2)
2448 return {MVT::v2i64, 1};
2449 if (NumElts == 4)
2450 return {MVT::v4i32, 1};
2451 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2452 CC != CallingConv::Intel_OCL_BI)
2453 return {MVT::v8i16, 1};
2454 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2455 CC != CallingConv::Intel_OCL_BI)
2456 return {MVT::v16i8, 1};
2457 // v32i1 passes in ymm unless we have BWI and the calling convention is
2458 // regcall.
2459 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2460 return {MVT::v32i8, 1};
2461 // Split v64i1 vectors if we don't have v64i8 available.
2462 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2463 if (Subtarget.useAVX512Regs())
2464 return {MVT::v64i8, 1};
2465 return {MVT::v32i8, 2};
2466 }
2467
2468 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2469 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2470 NumElts > 64)
2471 return {MVT::i8, NumElts};
2472
2473 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2474}
2475
2476MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2477 CallingConv::ID CC,
2478 EVT VT) const {
2479 if (VT.isVector()) {
2480 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2481 unsigned NumElts = VT.getVectorNumElements();
2482
2483 MVT RegisterVT;
2484 unsigned NumRegisters;
2485 std::tie(RegisterVT, NumRegisters) =
2486 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2487 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2488 return RegisterVT;
2489 }
2490
2491 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2492 return MVT::v8f16;
2493 }
2494
2495 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
2496 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
2497 !Subtarget.hasX87())
2498 return MVT::i32;
2499
2500 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2501 return getRegisterTypeForCallingConv(Context, CC,
2502 VT.changeVectorElementTypeToInteger());
2503
2504 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2505}
2506
2507unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2508 CallingConv::ID CC,
2509 EVT VT) const {
2510 if (VT.isVector()) {
2511 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2512 unsigned NumElts = VT.getVectorNumElements();
2513
2514 MVT RegisterVT;
2515 unsigned NumRegisters;
2516 std::tie(RegisterVT, NumRegisters) =
2517 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2518 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2519 return NumRegisters;
2520 }
2521
2522 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2523 return 1;
2524 }
2525
2526 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
2527 // x87 is disabled.
2528 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
2529 if (VT == MVT::f64)
2530 return 2;
2531 if (VT == MVT::f80)
2532 return 3;
2533 }
2534
2535 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2536 return getNumRegistersForCallingConv(Context, CC,
2537 VT.changeVectorElementTypeToInteger());
2538
2539 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2540}
2541
2542unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2543 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2544 unsigned &NumIntermediates, MVT &RegisterVT) const {
2545 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2546 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2547 Subtarget.hasAVX512() &&
2548 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2549 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2550 VT.getVectorNumElements() > 64)) {
2551 RegisterVT = MVT::i8;
2552 IntermediateVT = MVT::i1;
2553 NumIntermediates = VT.getVectorNumElements();
2554 return NumIntermediates;
2555 }
2556
2557 // Split v64i1 vectors if we don't have v64i8 available.
2558 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2559 CC != CallingConv::X86_RegCall) {
2560 RegisterVT = MVT::v32i8;
2561 IntermediateVT = MVT::v32i1;
2562 NumIntermediates = 2;
2563 return 2;
2564 }
2565
2566 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2567 NumIntermediates, RegisterVT);
2568}
2569
2570EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2571 LLVMContext& Context,
2572 EVT VT) const {
2573 if (!VT.isVector())
2574 return MVT::i8;
2575
2576 if (Subtarget.hasAVX512()) {
2577 // Figure out what this type will be legalized to.
2578 EVT LegalVT = VT;
2579 while (getTypeAction(Context, LegalVT) != TypeLegal)
2580 LegalVT = getTypeToTransformTo(Context, LegalVT);
2581
2582 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2583 if (LegalVT.getSimpleVT().is512BitVector())
2584 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2585
2586 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2587 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2588 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2589 // vXi16/vXi8.
2590 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2591 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2592 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2593 }
2594 }
2595
2596 return VT.changeVectorElementTypeToInteger();
2597}
2598
2599/// Helper for getByValTypeAlignment to determine
2600/// the desired ByVal argument alignment.
2601static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2602 if (MaxAlign == 16)
2603 return;
2604 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2605 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2606 MaxAlign = Align(16);
2607 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2608 Align EltAlign;
2609 getMaxByValAlign(ATy->getElementType(), EltAlign);
2610 if (EltAlign > MaxAlign)
2611 MaxAlign = EltAlign;
2612 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2613 for (auto *EltTy : STy->elements()) {
2614 Align EltAlign;
2615 getMaxByValAlign(EltTy, EltAlign);
2616 if (EltAlign > MaxAlign)
2617 MaxAlign = EltAlign;
2618 if (MaxAlign == 16)
2619 break;
2620 }
2621 }
2622}
2623
2624/// Return the desired alignment for ByVal aggregate
2625/// function arguments in the caller parameter area. For X86, aggregates
2626/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2627/// are at 4-byte boundaries.
2628uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
2629 const DataLayout &DL) const {
2630 if (Subtarget.is64Bit()) {
2631 // Max of 8 and alignment of type.
2632 Align TyAlign = DL.getABITypeAlign(Ty);
2633 if (TyAlign > 8)
2634 return TyAlign.value();
2635 return 8;
2636 }
2637
2638 Align Alignment(4);
2639 if (Subtarget.hasSSE1())
2640 getMaxByValAlign(Ty, Alignment);
2641 return Alignment.value();
2642}
2643
2644/// It returns EVT::Other if the type should be determined using generic
2645/// target-independent logic.
2646/// For vector ops we check that the overall size isn't larger than our
2647/// preferred vector width.
2648EVT X86TargetLowering::getOptimalMemOpType(
2649 const MemOp &Op, const AttributeList &FuncAttributes) const {
2650 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2651 if (Op.size() >= 16 &&
2652 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2653 // FIXME: Check if unaligned 64-byte accesses are slow.
2654 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2655 (Subtarget.getPreferVectorWidth() >= 512)) {
2656 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2657 }
2658 // FIXME: Check if unaligned 32-byte accesses are slow.
2659 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2660 (Subtarget.getPreferVectorWidth() >= 256)) {
2661 // Although this isn't a well-supported type for AVX1, we'll let
2662 // legalization and shuffle lowering produce the optimal codegen. If we
2663 // choose an optimal type with a vector element larger than a byte,
2664 // getMemsetStores() may create an intermediate splat (using an integer
2665 // multiply) before we splat as a vector.
2666 return MVT::v32i8;
2667 }
2668 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2669 return MVT::v16i8;
2670 // TODO: Can SSE1 handle a byte vector?
2671 // If we have SSE1 registers we should be able to use them.
2672 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2673 (Subtarget.getPreferVectorWidth() >= 128))
2674 return MVT::v4f32;
2675 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2676 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2677 // Do not use f64 to lower memcpy if source is string constant. It's
2678 // better to use i32 to avoid the loads.
2679 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2680 // The gymnastics of splatting a byte value into an XMM register and then
2681 // only using 8-byte stores (because this is a CPU with slow unaligned
2682 // 16-byte accesses) makes that a loser.
2683 return MVT::f64;
2684 }
2685 }
2686 // This is a compromise. If we reach here, unaligned accesses may be slow on
2687 // this target. However, creating smaller, aligned accesses could be even
2688 // slower and would certainly be a lot more code.
2689 if (Subtarget.is64Bit() && Op.size() >= 8)
2690 return MVT::i64;
2691 return MVT::i32;
2692}
2693
2694bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2695 if (VT == MVT::f32)
2696 return Subtarget.hasSSE1();
2697 if (VT == MVT::f64)
2698 return Subtarget.hasSSE2();
2699 return true;
2700}
2701
2702bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2703 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2704 bool *Fast) const {
2705 if (Fast) {
2706 switch (VT.getSizeInBits()) {
2707 default:
2708 // 8-byte and under are always assumed to be fast.
2709 *Fast = true;
2710 break;
2711 case 128:
2712 *Fast = !Subtarget.isUnalignedMem16Slow();
2713 break;
2714 case 256:
2715 *Fast = !Subtarget.isUnalignedMem32Slow();
2716 break;
2717 // TODO: What about AVX-512 (512-bit) accesses?
2718 }
2719 }
2720 // NonTemporal vector memory ops must be aligned.
2721 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2722 // NT loads can only be vector aligned, so if its less aligned than the
2723 // minimum vector size (which we can split the vector down to), we might as
2724 // well use a regular unaligned vector load.
2725 // We don't have any NT loads pre-SSE41.
2726 if (!!(Flags & MachineMemOperand::MOLoad))
2727 return (Alignment < 16 || !Subtarget.hasSSE41());
2728 return false;
2729 }
2730 // Misaligned accesses of any size are always allowed.
2731 return true;
2732}
2733
2734/// Return the entry encoding for a jump table in the
2735/// current function. The returned value is a member of the
2736/// MachineJumpTableInfo::JTEntryKind enum.
2737unsigned X86TargetLowering::getJumpTableEncoding() const {
2738 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2739 // symbol.
2740 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2741 return MachineJumpTableInfo::EK_Custom32;
2742
2743 // Otherwise, use the normal jump table encoding heuristics.
2744 return TargetLowering::getJumpTableEncoding();
2745}
2746
2747bool X86TargetLowering::splitValueIntoRegisterParts(
2748 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
2749 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
2750 bool IsABIRegCopy = CC.has_value();
2751 EVT ValueVT = Val.getValueType();
2752 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2753 unsigned ValueBits = ValueVT.getSizeInBits();
2754 unsigned PartBits = PartVT.getSizeInBits();
2755 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
2756 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
2757 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
2758 Parts[0] = Val;
2759 return true;
2760 }
2761 return false;
2762}
2763
2764SDValue X86TargetLowering::joinRegisterPartsIntoValue(
2765 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
2766 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
2767 bool IsABIRegCopy = CC.has_value();
2768 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2769 unsigned ValueBits = ValueVT.getSizeInBits();
2770 unsigned PartBits = PartVT.getSizeInBits();
2771 SDValue Val = Parts[0];
2772
2773 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
2774 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
2775 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
2776 return Val;
2777 }
2778 return SDValue();
2779}
2780
2781bool X86TargetLowering::useSoftFloat() const {
2782 return Subtarget.useSoftFloat();
2783}
2784
2785void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2786 ArgListTy &Args) const {
2787
2788 // Only relabel X86-32 for C / Stdcall CCs.
2789 if (Subtarget.is64Bit())
2790 return;
2791 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2792 return;
2793 unsigned ParamRegs = 0;
2794 if (auto *M = MF->getFunction().getParent())
2795 ParamRegs = M->getNumberRegisterParameters();
2796
2797 // Mark the first N int arguments as having reg
2798 for (auto &Arg : Args) {
2799 Type *T = Arg.Ty;
2800 if (T->isIntOrPtrTy())
2801 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2802 unsigned numRegs = 1;
2803 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2804 numRegs = 2;
2805 if (ParamRegs < numRegs)
2806 return;
2807 ParamRegs -= numRegs;
2808 Arg.IsInReg = true;
2809 }
2810 }
2811}
2812
2813const MCExpr *
2814X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2815 const MachineBasicBlock *MBB,
2816 unsigned uid,MCContext &Ctx) const{
2817 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2817, __extension__
__PRETTY_FUNCTION__))
;
2818 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2819 // entries.
2820 return MCSymbolRefExpr::create(MBB->getSymbol(),
2821 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2822}
2823
2824/// Returns relocation base for the given PIC jumptable.
2825SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2826 SelectionDAG &DAG) const {
2827 if (!Subtarget.is64Bit())
2828 // This doesn't have SDLoc associated with it, but is not really the
2829 // same as a Register.
2830 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2831 getPointerTy(DAG.getDataLayout()));
2832 return Table;
2833}
2834
2835/// This returns the relocation base for the given PIC jumptable,
2836/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2837const MCExpr *X86TargetLowering::
2838getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2839 MCContext &Ctx) const {
2840 // X86-64 uses RIP relative addressing based on the jump table label.
2841 if (Subtarget.isPICStyleRIPRel())
2842 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2843
2844 // Otherwise, the reference is relative to the PIC base.
2845 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2846}
2847
2848std::pair<const TargetRegisterClass *, uint8_t>
2849X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2850 MVT VT) const {
2851 const TargetRegisterClass *RRC = nullptr;
2852 uint8_t Cost = 1;
2853 switch (VT.SimpleTy) {
2854 default:
2855 return TargetLowering::findRepresentativeClass(TRI, VT);
2856 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2857 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2858 break;
2859 case MVT::x86mmx:
2860 RRC = &X86::VR64RegClass;
2861 break;
2862 case MVT::f32: case MVT::f64:
2863 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2864 case MVT::v4f32: case MVT::v2f64:
2865 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2866 case MVT::v8f32: case MVT::v4f64:
2867 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2868 case MVT::v16f32: case MVT::v8f64:
2869 RRC = &X86::VR128XRegClass;
2870 break;
2871 }
2872 return std::make_pair(RRC, Cost);
2873}
2874
2875unsigned X86TargetLowering::getAddressSpace() const {
2876 if (Subtarget.is64Bit())
2877 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2878 return 256;
2879}
2880
2881static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2882 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2883 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2884}
2885
2886static Constant* SegmentOffset(IRBuilderBase &IRB,
2887 int Offset, unsigned AddressSpace) {
2888 return ConstantExpr::getIntToPtr(
2889 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2890 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2891}
2892
2893Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
2894 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2895 // tcbhead_t; use it instead of the usual global variable (see
2896 // sysdeps/{i386,x86_64}/nptl/tls.h)
2897 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2898 if (Subtarget.isTargetFuchsia()) {
2899 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2900 return SegmentOffset(IRB, 0x10, getAddressSpace());
2901 } else {
2902 unsigned AddressSpace = getAddressSpace();
2903 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
2904 // Specially, some users may customize the base reg and offset.
2905 int Offset = M->getStackProtectorGuardOffset();
2906 // If we don't set -stack-protector-guard-offset value:
2907 // %fs:0x28, unless we're using a Kernel code model, in which case
2908 // it's %gs:0x28. gs:0x14 on i386.
2909 if (Offset == INT_MAX2147483647)
2910 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2911
2912 StringRef GuardReg = M->getStackProtectorGuardReg();
2913 if (GuardReg == "fs")
2914 AddressSpace = X86AS::FS;
2915 else if (GuardReg == "gs")
2916 AddressSpace = X86AS::GS;
2917
2918 // Use symbol guard if user specify.
2919 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
2920 if (!GuardSymb.empty()) {
2921 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
2922 if (!GV) {
2923 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
2924 : Type::getInt32Ty(M->getContext());
2925 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
2926 nullptr, GuardSymb, nullptr,
2927 GlobalValue::NotThreadLocal, AddressSpace);
2928 }
2929 return GV;
2930 }
2931
2932 return SegmentOffset(IRB, Offset, AddressSpace);
2933 }
2934 }
2935 return TargetLowering::getIRStackGuard(IRB);
2936}
2937
2938void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2939 // MSVC CRT provides functionalities for stack protection.
2940 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2941 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2942 // MSVC CRT has a global variable holding security cookie.
2943 M.getOrInsertGlobal("__security_cookie",
2944 Type::getInt8PtrTy(M.getContext()));
2945
2946 // MSVC CRT has a function to validate security cookie.
2947 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2948 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2949 Type::getInt8PtrTy(M.getContext()));
2950 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2951 F->setCallingConv(CallingConv::X86_FastCall);
2952 F->addParamAttr(0, Attribute::AttrKind::InReg);
2953 }
2954 return;
2955 }
2956
2957 StringRef GuardMode = M.getStackProtectorGuard();
2958
2959 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2960 if ((GuardMode == "tls" || GuardMode.empty()) &&
2961 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2962 return;
2963 TargetLowering::insertSSPDeclarations(M);
2964}
2965
2966Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2967 // MSVC CRT has a global variable holding security cookie.
2968 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2969 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2970 return M.getGlobalVariable("__security_cookie");
2971 }
2972 return TargetLowering::getSDagStackGuard(M);
2973}
2974
2975Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2976 // MSVC CRT has a function to validate security cookie.
2977 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2978 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2979 return M.getFunction("__security_check_cookie");
2980 }
2981 return TargetLowering::getSSPStackGuardCheck(M);
2982}
2983
2984Value *
2985X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
2986 if (Subtarget.getTargetTriple().isOSContiki())
2987 return getDefaultSafeStackPointerLocation(IRB, false);
2988
2989 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2990 // definition of TLS_SLOT_SAFESTACK in
2991 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2992 if (Subtarget.isTargetAndroid()) {
2993 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2994 // %gs:0x24 on i386
2995 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2996 return SegmentOffset(IRB, Offset, getAddressSpace());
2997 }
2998
2999 // Fuchsia is similar.
3000 if (Subtarget.isTargetFuchsia()) {
3001 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
3002 return SegmentOffset(IRB, 0x18, getAddressSpace());
3003 }
3004
3005 return TargetLowering::getSafeStackPointerLocation(IRB);
3006}
3007
3008//===----------------------------------------------------------------------===//
3009// Return Value Calling Convention Implementation
3010//===----------------------------------------------------------------------===//
3011
3012bool X86TargetLowering::CanLowerReturn(
3013 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3014 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3015 SmallVector<CCValAssign, 16> RVLocs;
3016 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3017 return CCInfo.CheckReturn(Outs, RetCC_X86);
3018}
3019
3020const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
3021 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
3022 return ScratchRegs;
3023}
3024
3025/// Lowers masks values (v*i1) to the local register values
3026/// \returns DAG node after lowering to register type
3027static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
3028 const SDLoc &Dl, SelectionDAG &DAG) {
3029 EVT ValVT = ValArg.getValueType();
3030
3031 if (ValVT == MVT::v1i1)
3032 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
3033 DAG.getIntPtrConstant(0, Dl));
3034
3035 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
3036 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
3037 // Two stage lowering might be required
3038 // bitcast: v8i1 -> i8 / v16i1 -> i16
3039 // anyextend: i8 -> i32 / i16 -> i32
3040 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
3041 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
3042 if (ValLoc == MVT::i32)
3043 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
3044 return ValToCopy;
3045 }
3046
3047 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
3048 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
3049 // One stage lowering is required
3050 // bitcast: v32i1 -> i32 / v64i1 -> i64
3051 return DAG.getBitcast(ValLoc, ValArg);
3052 }
3053
3054 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
3055}
3056
3057/// Breaks v64i1 value into two registers and adds the new node to the DAG
3058static void Passv64i1ArgInRegs(
3059 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
3060 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
3061 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
3062 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3062, __extension__
__PRETTY_FUNCTION__))
;
3063 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3063, __extension__
__PRETTY_FUNCTION__))
;
3064 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3064, __extension__
__PRETTY_FUNCTION__))
;
3065 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3066, __extension__
__PRETTY_FUNCTION__))
3066 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3066, __extension__
__PRETTY_FUNCTION__))
;
3067
3068 // Before splitting the value we cast it to i64
3069 Arg = DAG.getBitcast(MVT::i64, Arg);
3070
3071 // Splitting the value into two i32 types
3072 SDValue Lo, Hi;
3073 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
3074 DAG.getConstant(0, Dl, MVT::i32));
3075 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
3076 DAG.getConstant(1, Dl, MVT::i32));
3077
3078 // Attach the two i32 types into corresponding registers
3079 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
3080 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
3081}
3082
3083SDValue
3084X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3085 bool isVarArg,
3086 const SmallVectorImpl<ISD::OutputArg> &Outs,
3087 const SmallVectorImpl<SDValue> &OutVals,
3088 const SDLoc &dl, SelectionDAG &DAG) const {
3089 MachineFunction &MF = DAG.getMachineFunction();
3090 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3091
3092 // In some cases we need to disable registers from the default CSR list.
3093 // For example, when they are used for argument passing.
3094 bool ShouldDisableCalleeSavedRegister =
3095 CallConv == CallingConv::X86_RegCall ||
3096 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
3097
3098 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
3099 report_fatal_error("X86 interrupts may not return any value");
3100
3101 SmallVector<CCValAssign, 16> RVLocs;
3102 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
3103 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
3104
3105 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
3106 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
3107 ++I, ++OutsIndex) {
3108 CCValAssign &VA = RVLocs[I];
3109 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3109, __extension__
__PRETTY_FUNCTION__))
;
3110
3111 // Add the register to the CalleeSaveDisableRegs list.
3112 if (ShouldDisableCalleeSavedRegister)
3113 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
3114
3115 SDValue ValToCopy = OutVals[OutsIndex];
3116 EVT ValVT = ValToCopy.getValueType();
3117
3118 // Promote values to the appropriate types.
3119 if (VA.getLocInfo() == CCValAssign::SExt)
3120 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
3121 else if (VA.getLocInfo() == CCValAssign::ZExt)
3122 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
3123 else if (VA.getLocInfo() == CCValAssign::AExt) {
3124 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
3125 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
3126 else
3127 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
3128 }
3129 else if (VA.getLocInfo() == CCValAssign::BCvt)
3130 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
3131
3132 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3133, __extension__
__PRETTY_FUNCTION__))
3133 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3133, __extension__
__PRETTY_FUNCTION__))
;
3134
3135 // Report an error if we have attempted to return a value via an XMM
3136 // register and SSE was disabled.
3137 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3138 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3139 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3140 } else if (!Subtarget.hasSSE2() &&
3141 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3142 ValVT == MVT::f64) {
3143 // When returning a double via an XMM register, report an error if SSE2 is
3144 // not enabled.
3145 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3146 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3147 }
3148
3149 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
3150 // the RET instruction and handled by the FP Stackifier.
3151 if (VA.getLocReg() == X86::FP0 ||
3152 VA.getLocReg() == X86::FP1) {
3153 // If this is a copy from an xmm register to ST(0), use an FPExtend to
3154 // change the value to the FP stack register class.
3155 if (isScalarFPTypeInSSEReg(VA.getValVT()))
3156 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
3157 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3158 // Don't emit a copytoreg.
3159 continue;
3160 }
3161
3162 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
3163 // which is returned in RAX / RDX.
3164 if (Subtarget.is64Bit()) {
3165 if (ValVT == MVT::x86mmx) {
3166 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
3167 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
3168 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
3169 ValToCopy);
3170 // If we don't have SSE2 available, convert to v4f32 so the generated
3171 // register is legal.
3172 if (!Subtarget.hasSSE2())
3173 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
3174 }
3175 }
3176 }
3177
3178 if (VA.needsCustom()) {
3179 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3180, __extension__
__PRETTY_FUNCTION__))
3180 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3180, __extension__
__PRETTY_FUNCTION__))
;
3181
3182 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
3183 Subtarget);
3184
3185 // Add the second register to the CalleeSaveDisableRegs list.
3186 if (ShouldDisableCalleeSavedRegister)
3187 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
3188 } else {
3189 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3190 }
3191 }
3192
3193 SDValue Flag;
3194 SmallVector<SDValue, 6> RetOps;
3195 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3196 // Operand #1 = Bytes To Pop
3197 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
3198 MVT::i32));
3199
3200 // Copy the result values into the output registers.
3201 for (auto &RetVal : RetVals) {
3202 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
3203 RetOps.push_back(RetVal.second);
3204 continue; // Don't emit a copytoreg.
3205 }
3206
3207 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
3208 Flag = Chain.getValue(1);
3209 RetOps.push_back(
3210 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
3211 }
3212
3213 // Swift calling convention does not require we copy the sret argument
3214 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
3215
3216 // All x86 ABIs require that for returning structs by value we copy
3217 // the sret argument into %rax/%eax (depending on ABI) for the return.
3218 // We saved the argument into a virtual register in the entry block,
3219 // so now we copy the value out and into %rax/%eax.
3220 //
3221 // Checking Function.hasStructRetAttr() here is insufficient because the IR
3222 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
3223 // false, then an sret argument may be implicitly inserted in the SelDAG. In
3224 // either case FuncInfo->setSRetReturnReg() will have been called.
3225 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3226 // When we have both sret and another return value, we should use the
3227 // original Chain stored in RetOps[0], instead of the current Chain updated
3228 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
3229
3230 // For the case of sret and another return value, we have
3231 // Chain_0 at the function entry
3232 // Chain_1 = getCopyToReg(Chain_0) in the above loop
3233 // If we use Chain_1 in getCopyFromReg, we will have
3234 // Val = getCopyFromReg(Chain_1)
3235 // Chain_2 = getCopyToReg(Chain_1, Val) from below
3236
3237 // getCopyToReg(Chain_0) will be glued together with
3238 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
3239 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
3240 // Data dependency from Unit B to Unit A due to usage of Val in
3241 // getCopyToReg(Chain_1, Val)
3242 // Chain dependency from Unit A to Unit B
3243
3244 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
3245 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
3246 getPointerTy(MF.getDataLayout()));
3247
3248 Register RetValReg
3249 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
3250 X86::RAX : X86::EAX;
3251 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
3252 Flag = Chain.getValue(1);
3253
3254 // RAX/EAX now acts like a return value.
3255 RetOps.push_back(
3256 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
3257
3258 // Add the returned register to the CalleeSaveDisableRegs list.
3259 if (ShouldDisableCalleeSavedRegister)
3260 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
3261 }
3262
3263 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3264 const MCPhysReg *I =
3265 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3266 if (I) {
3267 for (; *I; ++I) {
3268 if (X86::GR64RegClass.contains(*I))
3269 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3270 else
3271 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3271)
;
3272 }
3273 }
3274
3275 RetOps[0] = Chain; // Update chain.
3276
3277 // Add the flag if we have it.
3278 if (Flag.getNode())
3279 RetOps.push_back(Flag);
3280
3281 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
3282 if (CallConv == CallingConv::X86_INTR)
3283 opcode = X86ISD::IRET;
3284 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
3285}
3286
3287bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3288 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
3289 return false;
3290
3291 SDValue TCChain = Chain;
3292 SDNode *Copy = *N->use_begin();
3293 if (Copy->getOpcode() == ISD::CopyToReg) {
3294 // If the copy has a glue operand, we conservatively assume it isn't safe to
3295 // perform a tail call.
3296 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3297 return false;
3298 TCChain = Copy->getOperand(0);
3299 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
3300 return false;
3301
3302 bool HasRet = false;
3303 for (const SDNode *U : Copy->uses()) {
3304 if (U->getOpcode() != X86ISD::RET_FLAG)
3305 return false;
3306 // If we are returning more than one value, we can definitely
3307 // not make a tail call see PR19530
3308 if (U->getNumOperands() > 4)
3309 return false;
3310 if (U->getNumOperands() == 4 &&
3311 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
3312 return false;
3313 HasRet = true;
3314 }
3315
3316 if (!HasRet)
3317 return false;
3318
3319 Chain = TCChain;
3320 return true;
3321}
3322
3323EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
3324 ISD::NodeType ExtendKind) const {
3325 MVT ReturnMVT = MVT::i32;
3326
3327 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
3328 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
3329 // The ABI does not require i1, i8 or i16 to be extended.
3330 //
3331 // On Darwin, there is code in the wild relying on Clang's old behaviour of
3332 // always extending i8/i16 return values, so keep doing that for now.
3333 // (PR26665).
3334 ReturnMVT = MVT::i8;
3335 }
3336
3337 EVT MinVT = getRegisterType(Context, ReturnMVT);
3338 return VT.bitsLT(MinVT) ? MinVT : VT;
3339}
3340
3341/// Reads two 32 bit registers and creates a 64 bit mask value.
3342/// \param VA The current 32 bit value that need to be assigned.
3343/// \param NextVA The next 32 bit value that need to be assigned.
3344/// \param Root The parent DAG node.
3345/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
3346/// glue purposes. In the case the DAG is already using
3347/// physical register instead of virtual, we should glue
3348/// our new SDValue to InFlag SDvalue.
3349/// \return a new SDvalue of size 64bit.
3350static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
3351 SDValue &Root, SelectionDAG &DAG,
3352 const SDLoc &Dl, const X86Subtarget &Subtarget,
3353 SDValue *InFlag = nullptr) {
3354 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3354, __extension__
__PRETTY_FUNCTION__))
;
3355 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3355, __extension__
__PRETTY_FUNCTION__))
;
3356 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3357, __extension__
__PRETTY_FUNCTION__))
3357 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3357, __extension__
__PRETTY_FUNCTION__))
;
3358 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3359, __extension__
__PRETTY_FUNCTION__))
3359 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3359, __extension__
__PRETTY_FUNCTION__))
;
3360 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3361, __extension__
__PRETTY_FUNCTION__))
3361 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3361, __extension__
__PRETTY_FUNCTION__))
;
3362
3363 SDValue Lo, Hi;
3364 SDValue ArgValueLo, ArgValueHi;
3365
3366 MachineFunction &MF = DAG.getMachineFunction();
3367 const TargetRegisterClass *RC = &X86::GR32RegClass;
3368
3369 // Read a 32 bit value from the registers.
3370 if (nullptr == InFlag) {
3371 // When no physical register is present,
3372 // create an intermediate virtual register.
3373 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3374 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3375 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3376 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3377 } else {
3378 // When a physical register is available read the value from it and glue
3379 // the reads together.
3380 ArgValueLo =
3381 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
3382 *InFlag = ArgValueLo.getValue(2);
3383 ArgValueHi =
3384 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
3385 *InFlag = ArgValueHi.getValue(2);
3386 }
3387
3388 // Convert the i32 type into v32i1 type.
3389 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
3390
3391 // Convert the i32 type into v32i1 type.
3392 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
3393
3394 // Concatenate the two values together.
3395 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
3396}
3397
3398/// The function will lower a register of various sizes (8/16/32/64)
3399/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
3400/// \returns a DAG node contains the operand after lowering to mask type.
3401static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3402 const EVT &ValLoc, const SDLoc &Dl,
3403 SelectionDAG &DAG) {
3404 SDValue ValReturned = ValArg;
3405
3406 if (ValVT == MVT::v1i1)
3407 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3408
3409 if (ValVT == MVT::v64i1) {
3410 // In 32 bit machine, this case is handled by getv64i1Argument
3411 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3411, __extension__
__PRETTY_FUNCTION__))
;
3412 // In 64 bit machine, There is no need to truncate the value only bitcast
3413 } else {
3414 MVT maskLen;
3415 switch (ValVT.getSimpleVT().SimpleTy) {
3416 case MVT::v8i1:
3417 maskLen = MVT::i8;
3418 break;
3419 case MVT::v16i1:
3420 maskLen = MVT::i16;
3421 break;
3422 case MVT::v32i1:
3423 maskLen = MVT::i32;
3424 break;
3425 default:
3426 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3426)
;
3427 }
3428
3429 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3430 }
3431 return DAG.getBitcast(ValVT, ValReturned);
3432}
3433
3434/// Lower the result values of a call into the
3435/// appropriate copies out of appropriate physical registers.
3436///
3437SDValue X86TargetLowering::LowerCallResult(
3438 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3439 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3440 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3441 uint32_t *RegMask) const {
3442
3443 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3444 // Assign locations to each value returned by this call.
3445 SmallVector<CCValAssign, 16> RVLocs;
3446 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3447 *DAG.getContext());
3448 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3449
3450 // Copy all of the result registers out of their specified physreg.
3451 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3452 ++I, ++InsIndex) {
3453 CCValAssign &VA = RVLocs[I];
3454 EVT CopyVT = VA.getLocVT();
3455
3456 // In some calling conventions we need to remove the used registers
3457 // from the register mask.
3458 if (RegMask) {
3459 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3460 SubRegs.isValid(); ++SubRegs)
3461 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3462 }
3463
3464 // Report an error if there was an attempt to return FP values via XMM
3465 // registers.
3466 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3467 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3468 if (VA.getLocReg() == X86::XMM1)
3469 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3470 else
3471 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3472 } else if (!Subtarget.hasSSE2() &&
3473 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3474 CopyVT == MVT::f64) {
3475 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3476 if (VA.getLocReg() == X86::XMM1)
3477 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3478 else
3479 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3480 }
3481
3482 // If we prefer to use the value in xmm registers, copy it out as f80 and
3483 // use a truncate to move it from fp stack reg to xmm reg.
3484 bool RoundAfterCopy = false;
3485 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3486 isScalarFPTypeInSSEReg(VA.getValVT())) {
3487 if (!Subtarget.hasX87())
3488 report_fatal_error("X87 register return with X87 disabled");
3489 CopyVT = MVT::f80;
3490 RoundAfterCopy = (CopyVT != VA.getLocVT());
3491 }
3492
3493 SDValue Val;
3494 if (VA.needsCustom()) {
3495 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3496, __extension__
__PRETTY_FUNCTION__))
3496 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3496, __extension__
__PRETTY_FUNCTION__))
;
3497 Val =
3498 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3499 } else {
3500 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3501 .getValue(1);
3502 Val = Chain.getValue(0);
3503 InFlag = Chain.getValue(2);
3504 }
3505
3506 if (RoundAfterCopy)
3507 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3508 // This truncation won't change the value.
3509 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
3510
3511 if (VA.isExtInLoc()) {
3512 if (VA.getValVT().isVector() &&
3513 VA.getValVT().getScalarType() == MVT::i1 &&
3514 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3515 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3516 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3517 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3518 } else
3519 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3520 }
3521
3522 if (VA.getLocInfo() == CCValAssign::BCvt)
3523 Val = DAG.getBitcast(VA.getValVT(), Val);
3524
3525 InVals.push_back(Val);
3526 }
3527
3528 return Chain;
3529}
3530
3531//===----------------------------------------------------------------------===//
3532// C & StdCall & Fast Calling Convention implementation
3533//===----------------------------------------------------------------------===//
3534// StdCall calling convention seems to be standard for many Windows' API
3535// routines and around. It differs from C calling convention just a little:
3536// callee should clean up the stack, not caller. Symbols should be also
3537// decorated in some fancy way :) It doesn't support any vector arguments.
3538// For info on fast calling convention see Fast Calling Convention (tail call)
3539// implementation LowerX86_32FastCCCallTo.
3540
3541/// Determines whether Args, either a set of outgoing arguments to a call, or a
3542/// set of incoming args of a call, contains an sret pointer that the callee
3543/// pops
3544template <typename T>
3545static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
3546 const X86Subtarget &Subtarget) {
3547 // Not C++20 (yet), so no concepts available.
3548 static_assert(std::is_same<T, ISD::OutputArg>::value ||
3549 std::is_same<T, ISD::InputArg>::value,
3550 "requires ISD::OutputArg or ISD::InputArg");
3551
3552 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
3553 // for most compilations.
3554 if (!Subtarget.is32Bit())
3555 return false;
3556
3557 if (Args.empty())
3558 return false;
3559
3560 // Most calls do not have an sret argument, check the arg next.
3561 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
3562 if (!Flags.isSRet() || Flags.isInReg())
3563 return false;
3564
3565 // The MSVCabi does not pop the sret.
3566 if (Subtarget.getTargetTriple().isOSMSVCRT())
3567 return false;
3568
3569 // MCUs don't pop the sret
3570 if (Subtarget.isTargetMCU())
3571 return false;
3572
3573 // Callee pops argument
3574 return true;
3575}
3576
3577/// Make a copy of an aggregate at address specified by "Src" to address
3578/// "Dst" with size and alignment information specified by the specific
3579/// parameter attribute. The copy will be passed as a byval function parameter.
3580static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3581 SDValue Chain, ISD::ArgFlagsTy Flags,
3582 SelectionDAG &DAG, const SDLoc &dl) {
3583 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3584
3585 return DAG.getMemcpy(
3586 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3587 /*isVolatile*/ false, /*AlwaysInline=*/true,
3588 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3589}
3590
3591/// Return true if the calling convention is one that we can guarantee TCO for.
3592static bool canGuaranteeTCO(CallingConv::ID CC) {
3593 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3594 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3595 CC == CallingConv::HHVM || CC == CallingConv::Tail ||
3596 CC == CallingConv::SwiftTail);
3597}
3598
3599/// Return true if we might ever do TCO for calls with this calling convention.
3600static bool mayTailCallThisCC(CallingConv::ID CC) {
3601 switch (CC) {
3602 // C calling conventions:
3603 case CallingConv::C:
3604 case CallingConv::Win64:
3605 case CallingConv::X86_64_SysV:
3606 // Callee pop conventions:
3607 case CallingConv::X86_ThisCall:
3608 case CallingConv::X86_StdCall:
3609 case CallingConv::X86_VectorCall:
3610 case CallingConv::X86_FastCall:
3611 // Swift:
3612 case CallingConv::Swift:
3613 return true;
3614 default:
3615 return canGuaranteeTCO(CC);
3616 }
3617}
3618
3619/// Return true if the function is being made into a tailcall target by
3620/// changing its ABI.
3621static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3622 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3623 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3624}
3625
3626bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3627 if (!CI->isTailCall())
3628 return false;
3629
3630 CallingConv::ID CalleeCC = CI->getCallingConv();
3631 if (!mayTailCallThisCC(CalleeCC))
3632 return false;
3633
3634 return true;
3635}
3636
3637SDValue
3638X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3639 const SmallVectorImpl<ISD::InputArg> &Ins,
3640 const SDLoc &dl, SelectionDAG &DAG,
3641 const CCValAssign &VA,
3642 MachineFrameInfo &MFI, unsigned i) const {
3643 // Create the nodes corresponding to a load from this parameter slot.
3644 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3645 bool AlwaysUseMutable = shouldGuaranteeTCO(
3646 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3647 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3648 EVT ValVT;
3649 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3650
3651 // If value is passed by pointer we have address passed instead of the value
3652 // itself. No need to extend if the mask value and location share the same
3653 // absolute size.
3654 bool ExtendedInMem =
3655 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3656 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3657
3658 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3659 ValVT = VA.getLocVT();
3660 else
3661 ValVT = VA.getValVT();
3662
3663 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3664 // changed with more analysis.
3665 // In case of tail call optimization mark all arguments mutable. Since they
3666 // could be overwritten by lowering of arguments in case of a tail call.
3667 if (Flags.isByVal()) {
3668 unsigned Bytes = Flags.getByValSize();
3669 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3670
3671 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3672 // can be improved with deeper analysis.
3673 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3674 /*isAliased=*/true);
3675 return DAG.getFrameIndex(FI, PtrVT);
3676 }
3677
3678 EVT ArgVT = Ins[i].ArgVT;
3679
3680 // If this is a vector that has been split into multiple parts, and the
3681 // scalar size of the parts don't match the vector element size, then we can't
3682 // elide the copy. The parts will have padding between them instead of being
3683 // packed like a vector.
3684 bool ScalarizedAndExtendedVector =
3685 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3686 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3687
3688 // This is an argument in memory. We might be able to perform copy elision.
3689 // If the argument is passed directly in memory without any extension, then we
3690 // can perform copy elision. Large vector types, for example, may be passed
3691 // indirectly by pointer.
3692 if (Flags.isCopyElisionCandidate() &&
3693 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3694 !ScalarizedAndExtendedVector) {
3695 SDValue PartAddr;
3696 if (Ins[i].PartOffset == 0) {
3697 // If this is a one-part value or the first part of a multi-part value,
3698 // create a stack object for the entire argument value type and return a
3699 // load from our portion of it. This assumes that if the first part of an
3700 // argument is in memory, the rest will also be in memory.
3701 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3702 /*IsImmutable=*/false);
3703 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3704 return DAG.getLoad(
3705 ValVT, dl, Chain, PartAddr,
3706 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3707 } else {
3708 // This is not the first piece of an argument in memory. See if there is
3709 // already a fixed stack object including this offset. If so, assume it
3710 // was created by the PartOffset == 0 branch above and create a load from
3711 // the appropriate offset into it.
3712 int64_t PartBegin = VA.getLocMemOffset();
3713 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3714 int FI = MFI.getObjectIndexBegin();
3715 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3716 int64_t ObjBegin = MFI.getObjectOffset(FI);
3717 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3718 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3719 break;
3720 }
3721 if (MFI.isFixedObjectIndex(FI)) {
3722 SDValue Addr =
3723 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3724 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3725 return DAG.getLoad(
3726 ValVT, dl, Chain, Addr,
3727 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3728 Ins[i].PartOffset));
3729 }
3730 }
3731 }
3732
3733 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3734 VA.getLocMemOffset(), isImmutable);
3735
3736 // Set SExt or ZExt flag.
3737 if (VA.getLocInfo() == CCValAssign::ZExt) {
3738 MFI.setObjectZExt(FI, true);
3739 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3740 MFI.setObjectSExt(FI, true);
3741 }
3742
3743 MaybeAlign Alignment;
3744 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
3745 ValVT != MVT::f80)
3746 Alignment = MaybeAlign(4);
3747 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3748 SDValue Val = DAG.getLoad(
3749 ValVT, dl, Chain, FIN,
3750 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3751 Alignment);
3752 return ExtendedInMem
3753 ? (VA.getValVT().isVector()
3754 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3755 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3756 : Val;
3757}
3758
3759// FIXME: Get this from tablegen.
3760static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3761 const X86Subtarget &Subtarget) {
3762 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3762, __extension__ __PRETTY_FUNCTION__))
;
3763
3764 if (Subtarget.isCallingConvWin64(CallConv)) {
3765 static const MCPhysReg GPR64ArgRegsWin64[] = {
3766 X86::RCX, X86::RDX, X86::R8, X86::R9
3767 };
3768 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3769 }
3770
3771 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3772 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3773 };
3774 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3775}
3776
3777// FIXME: Get this from tablegen.
3778static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3779 CallingConv::ID CallConv,
3780 const X86Subtarget &Subtarget) {
3781 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3781, __extension__ __PRETTY_FUNCTION__))
;
3782 if (Subtarget.isCallingConvWin64(CallConv)) {
3783 // The XMM registers which might contain var arg parameters are shadowed
3784 // in their paired GPR. So we only need to save the GPR to their home
3785 // slots.
3786 // TODO: __vectorcall will change this.
3787 return None;
3788 }
3789
3790 bool isSoftFloat = Subtarget.useSoftFloat();
3791 if (isSoftFloat || !Subtarget.hasSSE1())
3792 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3793 // registers.
3794 return None;
3795
3796 static const MCPhysReg XMMArgRegs64Bit[] = {
3797 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3798 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3799 };
3800 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3801}
3802
3803#ifndef NDEBUG
3804static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3805 return llvm::is_sorted(
3806 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3807 return A.getValNo() < B.getValNo();
3808 });
3809}
3810#endif
3811
3812namespace {
3813/// This is a helper class for lowering variable arguments parameters.
3814class VarArgsLoweringHelper {
3815public:
3816 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3817 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3818 CallingConv::ID CallConv, CCState &CCInfo)
3819 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3820 TheMachineFunction(DAG.getMachineFunction()),
3821 TheFunction(TheMachineFunction.getFunction()),
3822 FrameInfo(TheMachineFunction.getFrameInfo()),
3823 FrameLowering(*Subtarget.getFrameLowering()),
3824 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3825 CCInfo(CCInfo) {}
3826
3827 // Lower variable arguments parameters.
3828 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3829
3830private:
3831 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3832
3833 void forwardMustTailParameters(SDValue &Chain);
3834
3835 bool is64Bit() const { return Subtarget.is64Bit(); }
3836 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3837
3838 X86MachineFunctionInfo *FuncInfo;
3839 const SDLoc &DL;
3840 SelectionDAG &DAG;
3841 const X86Subtarget &Subtarget;
3842 MachineFunction &TheMachineFunction;
3843 const Function &TheFunction;
3844 MachineFrameInfo &FrameInfo;
3845 const TargetFrameLowering &FrameLowering;
3846 const TargetLowering &TargLowering;
3847 CallingConv::ID CallConv;
3848 CCState &CCInfo;
3849};
3850} // namespace
3851
3852void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3853 SDValue &Chain, unsigned StackSize) {
3854 // If the function takes variable number of arguments, make a frame index for
3855 // the start of the first vararg value... for expansion of llvm.va_start. We
3856 // can skip this if there are no va_start calls.
3857 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3858 CallConv != CallingConv::X86_ThisCall)) {
3859 FuncInfo->setVarArgsFrameIndex(
3860 FrameInfo.CreateFixedObject(1, StackSize, true));
3861 }
3862
3863 // 64-bit calling conventions support varargs and register parameters, so we
3864 // have to do extra work to spill them in the prologue.
3865 if (is64Bit()) {
3866 // Find the first unallocated argument registers.
3867 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3868 ArrayRef<MCPhysReg> ArgXMMs =
3869 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3870 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3871 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3872
3873 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3874, __extension__
__PRETTY_FUNCTION__))
3874 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3874, __extension__
__PRETTY_FUNCTION__))
;
3875
3876 if (isWin64()) {
3877 // Get to the caller-allocated home save location. Add 8 to account
3878 // for the return address.
3879 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3880 FuncInfo->setRegSaveFrameIndex(
3881 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3882 // Fixup to set vararg frame on shadow area (4 x i64).
3883 if (NumIntRegs < 4)
3884 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3885 } else {
3886 // For X86-64, if there are vararg parameters that are passed via
3887 // registers, then we must store them to their spots on the stack so
3888 // they may be loaded by dereferencing the result of va_next.
3889 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3890 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3891 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3892 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3893 }
3894
3895 SmallVector<SDValue, 6>
3896 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3897 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3898 // keeping live input value
3899 SDValue ALVal; // if applicable keeps SDValue for %al register
3900
3901 // Gather all the live in physical registers.
3902 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3903 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3904 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3905 }
3906 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3907 if (!AvailableXmms.empty()) {
3908 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3909 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3910 for (MCPhysReg Reg : AvailableXmms) {
3911 // FastRegisterAllocator spills virtual registers at basic
3912 // block boundary. That leads to usages of xmm registers
3913 // outside of check for %al. Pass physical registers to
3914 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
3915 TheMachineFunction.getRegInfo().addLiveIn(Reg);
3916 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
3917 }
3918 }
3919
3920 // Store the integer parameter registers.
3921 SmallVector<SDValue, 8> MemOps;
3922 SDValue RSFIN =
3923 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3924 TargLowering.getPointerTy(DAG.getDataLayout()));
3925 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3926 for (SDValue Val : LiveGPRs) {
3927 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3928 TargLowering.getPointerTy(DAG.getDataLayout()),
3929 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3930 SDValue Store =
3931 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3932 MachinePointerInfo::getFixedStack(
3933 DAG.getMachineFunction(),
3934 FuncInfo->getRegSaveFrameIndex(), Offset));
3935 MemOps.push_back(Store);
3936 Offset += 8;
3937 }
3938
3939 // Now store the XMM (fp + vector) parameter registers.
3940 if (!LiveXMMRegs.empty()) {
3941 SmallVector<SDValue, 12> SaveXMMOps;
3942 SaveXMMOps.push_back(Chain);
3943 SaveXMMOps.push_back(ALVal);
3944 SaveXMMOps.push_back(RSFIN);
3945 SaveXMMOps.push_back(
3946 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3947 llvm::append_range(SaveXMMOps, LiveXMMRegs);
3948 MachineMemOperand *StoreMMO =
3949 DAG.getMachineFunction().getMachineMemOperand(
3950 MachinePointerInfo::getFixedStack(
3951 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
3952 Offset),
3953 MachineMemOperand::MOStore, 128, Align(16));
3954 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
3955 DL, DAG.getVTList(MVT::Other),
3956 SaveXMMOps, MVT::i8, StoreMMO));
3957 }
3958
3959 if (!MemOps.empty())
3960 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3961 }
3962}
3963
3964void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3965 // Find the largest legal vector type.
3966 MVT VecVT = MVT::Other;
3967 // FIXME: Only some x86_32 calling conventions support AVX512.
3968 if (Subtarget.useAVX512Regs() &&
3969 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3970 CallConv == CallingConv::Intel_OCL_BI)))
3971 VecVT = MVT::v16f32;
3972 else if (Subtarget.hasAVX())
3973 VecVT = MVT::v8f32;
3974 else if (Subtarget.hasSSE2())
3975 VecVT = MVT::v4f32;
3976
3977 // We forward some GPRs and some vector types.
3978 SmallVector<MVT, 2> RegParmTypes;
3979 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3980 RegParmTypes.push_back(IntVT);
3981 if (VecVT != MVT::Other)
3982 RegParmTypes.push_back(VecVT);
3983
3984 // Compute the set of forwarded registers. The rest are scratch.
3985 SmallVectorImpl<ForwardedRegister> &Forwards =
3986 FuncInfo->getForwardedMustTailRegParms();
3987 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3988
3989 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3990 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3991 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3992 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3993 }
3994
3995 // Copy all forwards from physical to virtual registers.
3996 for (ForwardedRegister &FR : Forwards) {
3997 // FIXME: Can we use a less constrained schedule?
3998 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3999 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
4000 TargLowering.getRegClassFor(FR.VT));
4001 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
4002 }
4003}
4004
4005void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
4006 unsigned StackSize) {
4007 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
4008 // If necessary, it would be set into the correct value later.
4009 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
4010 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4011
4012 if (FrameInfo.hasVAStart())
4013 createVarArgAreaAndStoreRegisters(Chain, StackSize);
4014
4015 if (FrameInfo.hasMustTailInVarArgFunc())
4016 forwardMustTailParameters(Chain);
4017}
4018
4019SDValue X86TargetLowering::LowerFormalArguments(
4020 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4021 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4022 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4023 MachineFunction &MF = DAG.getMachineFunction();
4024 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4025
4026 const Function &F = MF.getFunction();
4027 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
4028 F.getName() == "main")
4029 FuncInfo->setForceFramePointer(true);
4030
4031 MachineFrameInfo &MFI = MF.getFrameInfo();
4032 bool Is64Bit = Subtarget.is64Bit();
4033 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4034
4035 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4037, __extension__
__PRETTY_FUNCTION__))
4036 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4037, __extension__
__PRETTY_FUNCTION__))
4037 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4037, __extension__
__PRETTY_FUNCTION__))
;
4038
4039 // Assign locations to all of the incoming arguments.
4040 SmallVector<CCValAssign, 16> ArgLocs;
4041 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4042
4043 // Allocate shadow area for Win64.
4044 if (IsWin64)
4045 CCInfo.AllocateStack(32, Align(8));
4046
4047 CCInfo.AnalyzeArguments(Ins, CC_X86);
4048
4049 // In vectorcall calling convention a second pass is required for the HVA
4050 // types.
4051 if (CallingConv::X86_VectorCall == CallConv) {
4052 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
4053 }
4054
4055 // The next loop assumes that the locations are in the same order of the
4056 // input arguments.
4057 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4058, __extension__
__PRETTY_FUNCTION__))
4058 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4058, __extension__
__PRETTY_FUNCTION__))
;
4059
4060 SDValue ArgValue;
4061 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
4062 ++I, ++InsIndex) {
4063 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4063, __extension__
__PRETTY_FUNCTION__))
;
4064 CCValAssign &VA = ArgLocs[I];
4065
4066 if (VA.isRegLoc()) {
4067 EVT RegVT = VA.getLocVT();
4068 if (VA.needsCustom()) {
4069 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4071, __extension__
__PRETTY_FUNCTION__))
4070 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4071, __extension__
__PRETTY_FUNCTION__))
4071 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4071, __extension__
__PRETTY_FUNCTION__))
;
4072
4073 // v64i1 values, in regcall calling convention, that are
4074 // compiled to 32 bit arch, are split up into two registers.
4075 ArgValue =
4076 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
4077 } else {
4078 const TargetRegisterClass *RC;
4079 if (RegVT == MVT::i8)
4080 RC = &X86::GR8RegClass;
4081 else if (RegVT == MVT::i16)
4082 RC = &X86::GR16RegClass;
4083 else if (RegVT == MVT::i32)
4084 RC = &X86::GR32RegClass;
4085 else if (Is64Bit && RegVT == MVT::i64)
4086 RC = &X86::GR64RegClass;
4087 else if (RegVT == MVT::f16)
4088 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
4089 else if (RegVT == MVT::f32)
4090 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
4091 else if (RegVT == MVT::f64)
4092 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
4093 else if (RegVT == MVT::f80)
4094 RC = &X86::RFP80RegClass;
4095 else if (RegVT == MVT::f128)
4096 RC = &X86::VR128RegClass;
4097 else if (RegVT.is512BitVector())
4098 RC = &X86::VR512RegClass;
4099 else if (RegVT.is256BitVector())
4100 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
4101 else if (RegVT.is128BitVector())
4102 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
4103 else if (RegVT == MVT::x86mmx)
4104 RC = &X86::VR64RegClass;
4105 else if (RegVT == MVT::v1i1)
4106 RC = &X86::VK1RegClass;
4107 else if (RegVT == MVT::v8i1)
4108 RC = &X86::VK8RegClass;
4109 else if (RegVT == MVT::v16i1)
4110 RC = &X86::VK16RegClass;
4111 else if (RegVT == MVT::v32i1)
4112 RC = &X86::VK32RegClass;
4113 else if (RegVT == MVT::v64i1)
4114 RC = &X86::VK64RegClass;
4115 else
4116 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4116)
;
4117
4118 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4119 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4120 }
4121
4122 // If this is an 8 or 16-bit value, it is really passed promoted to 32
4123 // bits. Insert an assert[sz]ext to capture this, then truncate to the
4124 // right size.
4125 if (VA.getLocInfo() == CCValAssign::SExt)
4126 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4127 DAG.getValueType(VA.getValVT()));
4128 else if (VA.getLocInfo() == CCValAssign::ZExt)
4129 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4130 DAG.getValueType(VA.getValVT()));
4131 else if (VA.getLocInfo() == CCValAssign::BCvt)
4132 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
4133
4134 if (VA.isExtInLoc()) {
4135 // Handle MMX values passed in XMM regs.
4136 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
4137 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
4138 else if (VA.getValVT().isVector() &&
4139 VA.getValVT().getScalarType() == MVT::i1 &&
4140 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
4141 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
4142 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
4143 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
4144 } else
4145 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4146 }
4147 } else {
4148 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4148, __extension__ __PRETTY_FUNCTION__))
;
4149 ArgValue =
4150 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
4151 }
4152
4153 // If value is passed via pointer - do a load.
4154 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
4155 ArgValue =
4156 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
4157
4158 InVals.push_back(ArgValue);
4159 }
4160
4161 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
4162 if (Ins[I].Flags.isSwiftAsync()) {
4163 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
4164 if (Subtarget.is64Bit())
4165 X86FI->setHasSwiftAsyncContext(true);
4166 else {
4167 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
4168 X86FI->setSwiftAsyncContextFrameIdx(FI);
4169 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
4170 DAG.getFrameIndex(FI, MVT::i32),
4171 MachinePointerInfo::getFixedStack(MF, FI));
4172 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
4173 }
4174 }
4175
4176 // Swift calling convention does not require we copy the sret argument
4177 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
4178 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
4179 continue;
4180
4181 // All x86 ABIs require that for returning structs by value we copy the
4182 // sret argument into %rax/%eax (depending on ABI) for the return. Save
4183 // the argument into a virtual register so that we can access it from the
4184 // return points.
4185 if (Ins[I].Flags.isSRet()) {
4186 assert(!FuncInfo->getSRetReturnReg() &&(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4187, __extension__
__PRETTY_FUNCTION__))
4187 "SRet return has already been set")(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4187, __extension__
__PRETTY_FUNCTION__))
;
4188 MVT PtrTy = getPointerTy(DAG.getDataLayout());
4189 Register Reg =
4190 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
4191 FuncInfo->setSRetReturnReg(Reg);
4192 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
4193 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
4194 break;
4195 }
4196 }
4197
4198 unsigned StackSize = CCInfo.getNextStackOffset();
4199 // Align stack specially for tail calls.
4200 if (shouldGuaranteeTCO(CallConv,
4201 MF.getTarget().Options.GuaranteedTailCallOpt))
4202 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
4203
4204 if (IsVarArg)
4205 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
4206 .lowerVarArgsParameters(Chain, StackSize);
4207
4208 // Some CCs need callee pop.
4209 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
4210 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4211 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
4212 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
4213 // X86 interrupts must pop the error code (and the alignment padding) if
4214 // present.
4215 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
4216 } else {
4217 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
4218 // If this is an sret function, the return should pop the hidden pointer.
4219 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
4220 FuncInfo->setBytesToPopOnReturn(4);
4221 }
4222
4223 if (!Is64Bit) {
4224 // RegSaveFrameIndex is X86-64 only.
4225 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4226 }
4227
4228 FuncInfo->setArgumentStackSize(StackSize);
4229
4230 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
4231 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
4232 if (Personality == EHPersonality::CoreCLR) {
4233 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "llvm/lib/Target/X86/X86ISelLowering.cpp", 4233,
__extension__ __PRETTY_FUNCTION__))
;
4234 // TODO: Add a mechanism to frame lowering that will allow us to indicate
4235 // that we'd prefer this slot be allocated towards the bottom of the frame
4236 // (i.e. near the stack pointer after allocating the frame). Every
4237 // funclet needs a copy of this slot in its (mostly empty) frame, and the
4238 // offset from the bottom of this and each funclet's frame must be the
4239 // same, so the size of funclets' (mostly empty) frames is dictated by
4240 // how far this slot is from the bottom (since they allocate just enough
4241 // space to accommodate holding this slot at the correct offset).
4242 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
4243 EHInfo->PSPSymFrameIdx = PSPSymFI;
4244 }
4245 }
4246
4247 if (CallConv == CallingConv::X86_RegCall ||
4248 F.hasFnAttribute("no_caller_saved_registers")) {
4249 MachineRegisterInfo &MRI = MF.getRegInfo();
4250 for (std::pair<Register, Register> Pair : MRI.liveins())
4251 MRI.disableCalleeSavedRegister(Pair.first);
4252 }
4253
4254 return Chain;
4255}
4256
4257SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
4258 SDValue Arg, const SDLoc &dl,
4259 SelectionDAG &DAG,
4260 const CCValAssign &VA,
4261 ISD::ArgFlagsTy Flags,
4262 bool isByVal) const {
4263 unsigned LocMemOffset = VA.getLocMemOffset();
4264 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4265 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4266 StackPtr, PtrOff);
4267 if (isByVal)
4268 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
4269
4270 MaybeAlign Alignment;
4271 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
4272 Arg.getSimpleValueType() != MVT::f80)
4273 Alignment = MaybeAlign(4);
4274 return DAG.getStore(
4275 Chain, dl, Arg, PtrOff,
4276 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
4277 Alignment);
4278}
4279
4280/// Emit a load of return address if tail call
4281/// optimization is performed and it is required.
4282SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
4283 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
4284 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
4285 // Adjust the Return address stack slot.
4286 EVT VT = getPointerTy(DAG.getDataLayout());
4287 OutRetAddr = getReturnAddressFrameIndex(DAG);
4288
4289 // Load the "old" Return address.
4290 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
4291 return SDValue(OutRetAddr.getNode(), 1);
4292}
4293
4294/// Emit a store of the return address if tail call
4295/// optimization is performed and it is required (FPDiff!=0).
4296static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
4297 SDValue Chain, SDValue RetAddrFrIdx,
4298 EVT PtrVT, unsigned SlotSize,
4299 int FPDiff, const SDLoc &dl) {
4300 // Store the return address to the appropriate stack slot.
4301 if (!FPDiff) return Chain;
4302 // Calculate the new stack slot for the return address.
4303 int NewReturnAddrFI =
4304 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
4305 false);
4306 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
4307 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
4308 MachinePointerInfo::getFixedStack(
4309 DAG.getMachineFunction(), NewReturnAddrFI));
4310 return Chain;
4311}
4312
4313/// Returns a vector_shuffle mask for an movs{s|d}, movd
4314/// operation of specified width.
4315static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
4316 SDValue V2) {
4317 unsigned NumElems = VT.getVectorNumElements();
4318 SmallVector<int, 8> Mask;
4319 Mask.push_back(NumElems);
4320 for (unsigned i = 1; i != NumElems; ++i)
4321 Mask.push_back(i);
4322 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
4323}
4324
4325SDValue
4326X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4327 SmallVectorImpl<SDValue> &InVals) const {
4328 SelectionDAG &DAG = CLI.DAG;
4329 SDLoc &dl = CLI.DL;
4330 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4331 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4332 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4333 SDValue Chain = CLI.Chain;
4334 SDValue Callee = CLI.Callee;
4335 CallingConv::ID CallConv = CLI.CallConv;
4336 bool &isTailCall = CLI.IsTailCall;
4337 bool isVarArg = CLI.IsVarArg;
4338 const auto *CB = CLI.CB;
4339
4340 MachineFunction &MF = DAG.getMachineFunction();
4341 bool Is64Bit = Subtarget.is64Bit();
4342 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4343 bool IsSibcall = false;
4344 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
4345 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
4346 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
4347 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
4348 bool HasNCSR = (CB && isa<CallInst>(CB) &&
4349 CB->hasFnAttr("no_caller_saved_registers"));
4350 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
4351 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
4352 bool IsCFICall = IsIndirectCall && CLI.CFIType;
4353 const Module *M = MF.getMMI().getModule();
4354 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
4355
4356 MachineFunction::CallSiteInfo CSInfo;
4357 if (CallConv == CallingConv::X86_INTR)
4358 report_fatal_error("X86 interrupts may not be called directly");
4359
4360 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
4361 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
4362 // If we are using a GOT, disable tail calls to external symbols with
4363 // default visibility. Tail calling such a symbol requires using a GOT
4364 // relocation, which forces early binding of the symbol. This breaks code
4365 // that require lazy function symbol resolution. Using musttail or
4366 // GuaranteedTailCallOpt will override this.
4367 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4368 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
4369 G->getGlobal()->hasDefaultVisibility()))
4370 isTailCall = false;
4371 }
4372
4373 if (isTailCall && !IsMustTail) {
4374 // Check if it's really possible to do a tail call.
4375 isTailCall = IsEligibleForTailCallOptimization(
4376 Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
4377 Ins, DAG);
4378
4379 // Sibcalls are automatically detected tailcalls which do not require
4380 // ABI changes.
4381 if (!IsGuaranteeTCO && isTailCall)
4382 IsSibcall = true;
4383
4384 if (isTailCall)
4385 ++NumTailCalls;
4386 }
4387
4388 if (IsMustTail && !isTailCall)
4389 report_fatal_error("failed to perform tail call elimination on a call "
4390 "site marked musttail");
4391
4392 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4393, __extension__
__PRETTY_FUNCTION__))
4393 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4393, __extension__
__PRETTY_FUNCTION__))
;
4394
4395 // Analyze operands of the call, assigning locations to each operand.
4396 SmallVector<CCValAssign, 16> ArgLocs;
4397 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4398
4399 // Allocate shadow area for Win64.
4400 if (IsWin64)
4401 CCInfo.AllocateStack(32, Align(8));
4402
4403 CCInfo.AnalyzeArguments(Outs, CC_X86);
4404
4405 // In vectorcall calling convention a second pass is required for the HVA
4406 // types.
4407 if (CallingConv::X86_VectorCall == CallConv) {
4408 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
4409 }
4410
4411 // Get a count of how many bytes are to be pushed on the stack.
4412 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
4413 if (IsSibcall)
4414 // This is a sibcall. The memory operands are available in caller's
4415 // own caller's stack.
4416 NumBytes = 0;
4417 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4418 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4419
4420 int FPDiff = 0;
4421 if (isTailCall &&
4422 shouldGuaranteeTCO(CallConv,
4423 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4424 // Lower arguments at fp - stackoffset + fpdiff.
4425 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4426
4427 FPDiff = NumBytesCallerPushed - NumBytes;
4428
4429 // Set the delta of movement of the returnaddr stackslot.
4430 // But only set if delta is greater than previous delta.
4431 if (FPDiff < X86Info->getTCReturnAddrDelta())
4432 X86Info->setTCReturnAddrDelta(FPDiff);
4433 }
4434
4435 unsigned NumBytesToPush = NumBytes;
4436 unsigned NumBytesToPop = NumBytes;
4437
4438 // If we have an inalloca argument, all stack space has already been allocated
4439 // for us and be right at the top of the stack. We don't support multiple
4440 // arguments passed in memory when using inalloca.
4441 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4442 NumBytesToPush = 0;
4443 if (!ArgLocs.back().isMemLoc())
4444 report_fatal_error("cannot use inalloca attribute on a register "
4445 "parameter");
4446 if (ArgLocs.back().getLocMemOffset() != 0)
4447 report_fatal_error("any parameter with the inalloca attribute must be "
4448 "the only memory argument");
4449 } else if (CLI.IsPreallocated) {
4450 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4452, __extension__
__PRETTY_FUNCTION__))
4451 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4452, __extension__
__PRETTY_FUNCTION__))
4452 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4452, __extension__
__PRETTY_FUNCTION__))
;
4453 SmallVector<size_t, 4> PreallocatedOffsets;
4454 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4455 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4456 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4457 }
4458 }
4459 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4460 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4461 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4462 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4463 NumBytesToPush = 0;
4464 }
4465
4466 if (!IsSibcall && !IsMustTail)
4467 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4468 NumBytes - NumBytesToPush, dl);
4469
4470 SDValue RetAddrFrIdx;
4471 // Load return address for tail calls.
4472 if (isTailCall && FPDiff)
4473 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4474 Is64Bit, FPDiff, dl);
4475
4476 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4477 SmallVector<SDValue, 8> MemOpChains;
4478 SDValue StackPtr;
4479
4480 // The next loop assumes that the locations are in the same order of the
4481 // input arguments.
4482 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4483, __extension__
__PRETTY_FUNCTION__))
4483 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4483, __extension__
__PRETTY_FUNCTION__))
;
4484
4485 // Walk the register/memloc assignments, inserting copies/loads. In the case
4486 // of tail call optimization arguments are handle later.
4487 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4488 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4489 ++I, ++OutIndex) {
4490 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4490, __extension__
__PRETTY_FUNCTION__))
;
4491 // Skip inalloca/preallocated arguments, they have already been written.
4492 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4493 if (Flags.isInAlloca() || Flags.isPreallocated())
4494 continue;
4495
4496 CCValAssign &VA = ArgLocs[I];
4497 EVT RegVT = VA.getLocVT();
4498 SDValue Arg = OutVals[OutIndex];
4499 bool isByVal = Flags.isByVal();
4500
4501 // Promote the value if needed.
4502 switch (VA.getLocInfo()) {
4503 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4503)
;
4504 case CCValAssign::Full: break;
4505 case CCValAssign::SExt:
4506 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4507 break;
4508 case CCValAssign::ZExt:
4509 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4510 break;
4511 case CCValAssign::AExt:
4512 if (Arg.getValueType().isVector() &&
4513 Arg.getValueType().getVectorElementType() == MVT::i1)
4514 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4515 else if (RegVT.is128BitVector()) {
4516 // Special case: passing MMX values in XMM registers.
4517 Arg = DAG.getBitcast(MVT::i64, Arg);
4518 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4519 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4520 } else
4521 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4522 break;
4523 case CCValAssign::BCvt:
4524 Arg = DAG.getBitcast(RegVT, Arg);
4525 break;
4526 case CCValAssign::Indirect: {
4527 if (isByVal) {
4528 // Memcpy the argument to a temporary stack slot to prevent
4529 // the caller from seeing any modifications the callee may make
4530 // as guaranteed by the `byval` attribute.
4531 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4532 Flags.getByValSize(),
4533 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4534 SDValue StackSlot =
4535 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4536 Chain =
4537 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4538 // From now on treat this as a regular pointer
4539 Arg = StackSlot;
4540 isByVal = false;
4541 } else {
4542 // Store the argument.
4543 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4544 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4545 Chain = DAG.getStore(
4546 Chain, dl, Arg, SpillSlot,
4547 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4548 Arg = SpillSlot;
4549 }
4550 break;
4551 }
4552 }
4553
4554 if (VA.needsCustom()) {
4555 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4556, __extension__
__PRETTY_FUNCTION__))
4556 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4556, __extension__
__PRETTY_FUNCTION__))
;
4557 // Split v64i1 value into two registers
4558 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4559 } else if (VA.isRegLoc()) {
4560 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4561 const TargetOptions &Options = DAG.getTarget().Options;
4562 if (Options.EmitCallSiteInfo)
4563 CSInfo.emplace_back(VA.getLocReg(), I);
4564 if (isVarArg && IsWin64) {
4565 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4566 // shadow reg if callee is a varargs function.
4567 Register ShadowReg;
4568 switch (VA.getLocReg()) {
4569 case X86::XMM0: ShadowReg = X86::RCX; break;
4570 case X86::XMM1: ShadowReg = X86::RDX; break;
4571 case X86::XMM2: ShadowReg = X86::R8; break;
4572 case X86::XMM3: ShadowReg = X86::R9; break;
4573 }
4574 if (ShadowReg)
4575 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4576 }
4577 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4578 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4578, __extension__ __PRETTY_FUNCTION__))
;
4579 if (!StackPtr.getNode())
4580 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4581 getPointerTy(DAG.getDataLayout()));
4582 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4583 dl, DAG, VA, Flags, isByVal));
4584 }
4585 }
4586
4587 if (!MemOpChains.empty())
4588 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4589
4590 if (Subtarget.isPICStyleGOT()) {
4591 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4592 // GOT pointer (except regcall).
4593 if (!isTailCall) {
4594 // Indirect call with RegCall calling convertion may use up all the
4595 // general registers, so it is not suitable to bind EBX reister for
4596 // GOT address, just let register allocator handle it.
4597 if (CallConv != CallingConv::X86_RegCall)
4598 RegsToPass.push_back(std::make_pair(
4599 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4600 getPointerTy(DAG.getDataLayout()))));
4601 } else {
4602 // If we are tail calling and generating PIC/GOT style code load the
4603 // address of the callee into ECX. The value in ecx is used as target of
4604 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4605 // for tail calls on PIC/GOT architectures. Normally we would just put the
4606 // address of GOT into ebx and then call target@PLT. But for tail calls
4607 // ebx would be restored (since ebx is callee saved) before jumping to the
4608 // target@PLT.
4609
4610 // Note: The actual moving to ECX is done further down.
4611 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4612 if (G && !G->getGlobal()->hasLocalLinkage() &&
4613 G->getGlobal()->hasDefaultVisibility())
4614 Callee = LowerGlobalAddress(Callee, DAG);
4615 else if (isa<ExternalSymbolSDNode>(Callee))
4616 Callee = LowerExternalSymbol(Callee, DAG);
4617 }
4618 }
4619
4620 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
4621 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
4622 // From AMD64 ABI document:
4623 // For calls that may call functions that use varargs or stdargs
4624 // (prototype-less calls or calls to functions containing ellipsis (...) in
4625 // the declaration) %al is used as hidden argument to specify the number
4626 // of SSE registers used. The contents of %al do not need to match exactly
4627 // the number of registers, but must be an ubound on the number of SSE
4628 // registers used and is in the range 0 - 8 inclusive.
4629
4630 // Count the number of XMM registers allocated.
4631 static const MCPhysReg XMMArgRegs[] = {
4632 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4633 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4634 };
4635 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4636 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4637, __extension__
__PRETTY_FUNCTION__))
4637 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4637, __extension__
__PRETTY_FUNCTION__))
;
4638 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4639 DAG.getConstant(NumXMMRegs, dl,
4640 MVT::i8)));
4641 }
4642
4643 if (isVarArg && IsMustTail) {
4644 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4645 for (const auto &F : Forwards) {
4646 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4647 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4648 }
4649 }
4650
4651 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4652 // don't need this because the eligibility check rejects calls that require
4653 // shuffling arguments passed in memory.
4654 if (!IsSibcall && isTailCall) {
4655 // Force all the incoming stack arguments to be loaded from the stack
4656 // before any new outgoing arguments are stored to the stack, because the
4657 // outgoing stack slots may alias the incoming argument stack slots, and
4658 // the alias isn't otherwise explicit. This is slightly more conservative
4659 // than necessary, because it means that each store effectively depends
4660 // on every argument instead of just those arguments it would clobber.
4661 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4662
4663 SmallVector<SDValue, 8> MemOpChains2;
4664 SDValue FIN;
4665 int FI = 0;
4666 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4667 ++I, ++OutsIndex) {
4668 CCValAssign &VA = ArgLocs[I];
4669
4670 if (VA.isRegLoc()) {
4671 if (VA.needsCustom()) {
4672 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4673, __extension__
__PRETTY_FUNCTION__))
4673 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4673, __extension__
__PRETTY_FUNCTION__))
;
4674 // This means that we are in special case where one argument was
4675 // passed through two register locations - Skip the next location
4676 ++I;
4677 }
4678
4679 continue;
4680 }
4681
4682 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4682, __extension__ __PRETTY_FUNCTION__))
;
4683 SDValue Arg = OutVals[OutsIndex];
4684 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4685 // Skip inalloca/preallocated arguments. They don't require any work.
4686 if (Flags.isInAlloca() || Flags.isPreallocated())
4687 continue;
4688 // Create frame index.
4689 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4690 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4691 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4692 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4693
4694 if (Flags.isByVal()) {
4695 // Copy relative to framepointer.
4696 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4697 if (!StackPtr.getNode())
4698 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4699 getPointerTy(DAG.getDataLayout()));
4700 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4701 StackPtr, Source);
4702
4703 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4704 ArgChain,
4705 Flags, DAG, dl));
4706 } else {
4707 // Store relative to framepointer.
4708 MemOpChains2.push_back(DAG.getStore(
4709 ArgChain, dl, Arg, FIN,
4710 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4711 }
4712 }
4713
4714 if (!MemOpChains2.empty())
4715 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4716
4717 // Store the return address to the appropriate stack slot.
4718 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4719 getPointerTy(DAG.getDataLayout()),
4720 RegInfo->getSlotSize(), FPDiff, dl);
4721 }
4722
4723 // Build a sequence of copy-to-reg nodes chained together with token chain
4724 // and flag operands which copy the outgoing args into registers.
4725 SDValue InFlag;
4726 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4727 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4728 RegsToPass[i].second, InFlag);
4729 InFlag = Chain.getValue(1);
4730 }
4731
4732 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4733 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4733, __extension__
__PRETTY_FUNCTION__))
;
4734 // In the 64-bit large code model, we have to make all calls
4735 // through a register, since the call instruction's 32-bit
4736 // pc-relative offset may not be large enough to hold the whole
4737 // address.
4738 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4739 Callee->getOpcode() == ISD::ExternalSymbol) {
4740 // Lower direct calls to global addresses and external symbols. Setting
4741 // ForCall to true here has the effect of removing WrapperRIP when possible
4742 // to allow direct calls to be selected without first materializing the
4743 // address into a register.
4744 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4745 } else if (Subtarget.isTarget64BitILP32() &&
4746 Callee.getValueType() == MVT::i32) {
4747 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4748 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4749 }
4750
4751 // Returns a chain & a flag for retval copy to use.
4752 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4753 SmallVector<SDValue, 8> Ops;
4754
4755 if (!IsSibcall && isTailCall && !IsMustTail) {
4756 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InFlag, dl);
4757 InFlag = Chain.getValue(1);
4758 }
4759
4760 Ops.push_back(Chain);
4761 Ops.push_back(Callee);
4762
4763 if (isTailCall)
4764 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4765
4766 // Add argument registers to the end of the list so that they are known live
4767 // into the call.
4768 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4769 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4770 RegsToPass[i].second.getValueType()));
4771
4772 // Add a register mask operand representing the call-preserved registers.
4773 const uint32_t *Mask = [&]() {
4774 auto AdaptedCC = CallConv;
4775 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4776 // use X86_INTR calling convention because it has the same CSR mask
4777 // (same preserved registers).
4778 if (HasNCSR)
4779 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4780 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4781 // to use the CSR_NoRegs_RegMask.
4782 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4783 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4784 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4785 }();
4786 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4786, __extension__
__PRETTY_FUNCTION__))
;
4787
4788 // If this is an invoke in a 32-bit function using a funclet-based
4789 // personality, assume the function clobbers all registers. If an exception
4790 // is thrown, the runtime will not restore CSRs.
4791 // FIXME: Model this more precisely so that we can register allocate across
4792 // the normal edge and spill and fill across the exceptional edge.
4793 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4794 const Function &CallerFn = MF.getFunction();
4795 EHPersonality Pers =
4796 CallerFn.hasPersonalityFn()
4797 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4798 : EHPersonality::Unknown;
4799 if (isFuncletEHPersonality(Pers))
4800 Mask = RegInfo->getNoPreservedMask();
4801 }
4802
4803 // Define a new register mask from the existing mask.
4804 uint32_t *RegMask = nullptr;
4805
4806 // In some calling conventions we need to remove the used physical registers
4807 // from the reg mask.
4808 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4809 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4810
4811 // Allocate a new Reg Mask and copy Mask.
4812 RegMask = MF.allocateRegMask();
4813 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4814 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4815
4816 // Make sure all sub registers of the argument registers are reset
4817 // in the RegMask.
4818 for (auto const &RegPair : RegsToPass)
4819 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4820 SubRegs.isValid(); ++SubRegs)
4821 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4822
4823 // Create the RegMask Operand according to our updated mask.
4824 Ops.push_back(DAG.getRegisterMask(RegMask));
4825 } else {
4826 // Create the RegMask Operand according to the static mask.
4827 Ops.push_back(DAG.getRegisterMask(Mask));
4828 }
4829
4830 if (InFlag.getNode())
4831 Ops.push_back(InFlag);
4832
4833 if (isTailCall) {
4834 // We used to do:
4835 //// If this is the first return lowered for this function, add the regs
4836 //// to the liveout set for the function.
4837 // This isn't right, although it's probably harmless on x86; liveouts
4838 // should be computed from returns not tail calls. Consider a void
4839 // function making a tail call to a function returning int.
4840 MF.getFrameInfo().setHasTailCall();
4841 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4842
4843 if (IsCFICall)
4844 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4845
4846 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4847 return Ret;
4848 }
4849
4850 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4851 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4852 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4853 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4854 // expanded to the call, directly followed by a special marker sequence and
4855 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4856 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4857, __extension__
__PRETTY_FUNCTION__))
4857 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4857, __extension__
__PRETTY_FUNCTION__))
;
4858 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4858, __extension__
__PRETTY_FUNCTION__))
;
4859
4860 // Add a target global address for the retainRV/claimRV runtime function
4861 // just before the call target.
4862 Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
4863 auto PtrVT = getPointerTy(DAG.getDataLayout());
4864 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
4865 Ops.insert(Ops.begin() + 1, GA);
4866 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4867 } else {
4868 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4869 }
4870
4871 if (IsCFICall)
4872 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4873
4874 InFlag = Chain.getValue(1);
4875 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4876 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4877
4878 // Save heapallocsite metadata.
4879 if (CLI.CB)
4880 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4881 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4882
4883 // Create the CALLSEQ_END node.
4884 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
4885 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4886 DAG.getTarget().Options.GuaranteedTailCallOpt))
4887 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4888 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
4889 // If this call passes a struct-return pointer, the callee
4890 // pops that struct pointer.
4891 NumBytesForCalleeToPop = 4;
4892
4893 // Returns a flag for retval copy to use.
4894 if (!IsSibcall) {
4895 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
4896 InFlag, dl);
4897 InFlag = Chain.getValue(1);
4898 }
4899
4900 // Handle result values, copying them out of physregs into vregs that we
4901 // return.
4902 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4903 InVals, RegMask);
4904}
4905
4906//===----------------------------------------------------------------------===//
4907// Fast Calling Convention (tail call) implementation
4908//===----------------------------------------------------------------------===//
4909
4910// Like std call, callee cleans arguments, convention except that ECX is
4911// reserved for storing the tail called function address. Only 2 registers are
4912// free for argument passing (inreg). Tail call optimization is performed
4913// provided:
4914// * tailcallopt is enabled
4915// * caller/callee are fastcc
4916// On X86_64 architecture with GOT-style position independent code only local
4917// (within module) calls are supported at the moment.
4918// To keep the stack aligned according to platform abi the function
4919// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4920// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4921// If a tail called function callee has more arguments than the caller the
4922// caller needs to make sure that there is room to move the RETADDR to. This is
4923// achieved by reserving an area the size of the argument delta right after the
4924// original RETADDR, but before the saved framepointer or the spilled registers
4925// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4926// stack layout:
4927// arg1
4928// arg2
4929// RETADDR
4930// [ new RETADDR
4931// move area ]
4932// (possible EBP)
4933// ESI
4934// EDI
4935// local1 ..
4936
4937/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4938/// requirement.
4939unsigned
4940X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4941 SelectionDAG &DAG) const {
4942 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4943 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4944 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4945, __extension__
__PRETTY_FUNCTION__))
4945 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4945, __extension__
__PRETTY_FUNCTION__))
;
4946 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4947}
4948
4949/// Return true if the given stack call argument is already available in the
4950/// same position (relatively) of the caller's incoming argument stack.
4951static
4952bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4953 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4954 const X86InstrInfo *TII, const CCValAssign &VA) {
4955 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4956
4957 for (;;) {
4958 // Look through nodes that don't alter the bits of the incoming value.
4959 unsigned Op = Arg.getOpcode();
4960 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4961 Arg = Arg.getOperand(0);
4962 continue;
4963 }
4964 if (Op == ISD::TRUNCATE) {
4965 const SDValue &TruncInput = Arg.getOperand(0);
4966 if (TruncInput.getOpcode() == ISD::AssertZext &&
4967 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4968 Arg.getValueType()) {
4969 Arg = TruncInput.getOperand(0);
4970 continue;
4971 }
4972 }
4973 break;
4974 }
4975
4976 int FI = INT_MAX2147483647;
4977 if (Arg.getOpcode() == ISD::CopyFromReg) {
4978 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4979 if (!VR.isVirtual())
4980 return false;
4981 MachineInstr *Def = MRI->getVRegDef(VR);
4982 if (!Def)
4983 return false;
4984 if (!Flags.isByVal()) {
4985 if (!TII->isLoadFromStackSlot(*Def, FI))
4986 return false;
4987 } else {
4988 unsigned Opcode = Def->getOpcode();
4989 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4990 Opcode == X86::LEA64_32r) &&
4991 Def->getOperand(1).isFI()) {
4992 FI = Def->getOperand(1).getIndex();
4993 Bytes = Flags.getByValSize();
4994 } else
4995 return false;
4996 }
4997 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4998 if (Flags.isByVal())
4999 // ByVal argument is passed in as a pointer but it's now being
5000 // dereferenced. e.g.
5001 // define @foo(%struct.X* %A) {
5002 // tail call @bar(%struct.X* byval %A)
5003 // }
5004 return false;
5005 SDValue Ptr = Ld->getBasePtr();
5006 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
5007 if (!FINode)
5008 return false;
5009 FI = FINode->getIndex();
5010 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
5011 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
5012 FI = FINode->getIndex();
5013 Bytes = Flags.getByValSize();
5014 } else
5015 return false;
5016
5017 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "llvm/lib/Target/X86/X86ISelLowering.cpp",
5017, __extension__ __PRETTY_FUNCTION__))
;
5018 if (!MFI.isFixedObjectIndex(FI))
5019 return false;
5020
5021 if (Offset != MFI.getObjectOffset(FI))
5022 return false;
5023
5024 // If this is not byval, check that the argument stack object is immutable.
5025 // inalloca and argument copy elision can create mutable argument stack
5026 // objects. Byval objects can be mutated, but a byval call intends to pass the
5027 // mutated memory.
5028 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
5029 return false;
5030
5031 if (VA.getLocVT().getFixedSizeInBits() >
5032 Arg.getValueSizeInBits().getFixedSize()) {
5033 // If the argument location is wider than the argument type, check that any
5034 // extension flags match.
5035 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
5036 Flags.isSExt() != MFI.isObjectSExt(FI)) {
5037 return false;
5038 }
5039 }
5040
5041 return Bytes == MFI.getObjectSize(FI);
5042}
5043
5044/// Check whether the call is eligible for tail call optimization. Targets
5045/// that want to do tail call optimization should implement this function.
5046bool X86TargetLowering::IsEligibleForTailCallOptimization(
5047 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
5048 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
5049 const SmallVectorImpl<SDValue> &OutVals,
5050 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5051 if (!mayTailCallThisCC(CalleeCC))
5052 return false;
5053
5054 // If -tailcallopt is specified, make fastcc functions tail-callable.
5055 MachineFunction &MF = DAG.getMachineFunction();
5056 const Function &CallerF = MF.getFunction();
5057
5058 // If the function return type is x86_fp80 and the callee return type is not,
5059 // then the FP_EXTEND of the call result is not a nop. It's not safe to
5060 // perform a tailcall optimization here.
5061 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
5062 return false;
5063
5064 CallingConv::ID CallerCC = CallerF.getCallingConv();
5065 bool CCMatch = CallerCC == CalleeCC;
5066 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
5067 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
5068 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
5069 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
5070
5071 // Win64 functions have extra shadow space for argument homing. Don't do the
5072 // sibcall if the caller and callee have mismatched expectations for this
5073 // space.
5074 if (IsCalleeWin64 != IsCallerWin64)
5075 return false;
5076
5077 if (IsGuaranteeTCO) {
5078 if (canGuaranteeTCO(CalleeCC) && CCMatch)
5079 return true;
5080 return false;
5081 }
5082
5083 // Look for obvious safe cases to perform tail call optimization that do not
5084 // require ABI changes. This is what gcc calls sibcall.
5085
5086 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
5087 // emit a special epilogue.
5088 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5089 if (RegInfo->hasStackRealignment(MF))
5090 return false;
5091
5092 // Also avoid sibcall optimization if we're an sret return fn and the callee
5093 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
5094 // insufficient.
5095 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
5096 // For a compatible tail call the callee must return our sret pointer. So it
5097 // needs to be (a) an sret function itself and (b) we pass our sret as its
5098 // sret. Condition #b is harder to determine.
5099 return false;
5100 } else if (IsCalleePopSRet)
5101 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
5102 // expect that.
5103 return false;
5104
5105 // Do not sibcall optimize vararg calls unless all arguments are passed via
5106 // registers.
5107 LLVMContext &C = *DAG.getContext();
5108 if (isVarArg && !Outs.empty()) {
5109 // Optimizing for varargs on Win64 is unlikely to be safe without
5110 // additional testing.
5111 if (IsCalleeWin64 || IsCallerWin64)
5112 return false;
5113
5114 SmallVector<CCValAssign, 16> ArgLocs;
5115 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5116
5117 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5118 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
5119 if (!ArgLocs[i].isRegLoc())
5120 return false;
5121 }
5122
5123 // If the call result is in ST0 / ST1, it needs to be popped off the x87
5124 // stack. Therefore, if it's not used by the call it is not safe to optimize
5125 // this into a sibcall.
5126 bool Unused = false;
5127 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5128 if (!Ins[i].Used) {
5129 Unused = true;
5130 break;
5131 }
5132 }
5133 if (Unused) {
5134 SmallVector<CCValAssign, 16> RVLocs;
5135 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
5136 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
5137 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5138 CCValAssign &VA = RVLocs[i];
5139 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
5140 return false;
5141 }
5142 }
5143
5144 // Check that the call results are passed in the same way.
5145 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5146 RetCC_X86, RetCC_X86))
5147 return false;
5148 // The callee has to preserve all registers the caller needs to preserve.
5149 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
5150 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5151 if (!CCMatch) {
5152 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5153 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5154 return false;
5155 }
5156
5157 unsigned StackArgsSize = 0;
5158
5159 // If the callee takes no arguments then go on to check the results of the
5160 // call.
5161 if (!Outs.empty()) {
5162 // Check if stack adjustment is needed. For now, do not do this if any
5163 // argument is passed on the stack.
5164 SmallVector<CCValAssign, 16> ArgLocs;
5165 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5166
5167 // Allocate shadow area for Win64
5168 if (IsCalleeWin64)
5169 CCInfo.AllocateStack(32, Align(8));
5170
5171 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5172 StackArgsSize = CCInfo.getNextStackOffset();
5173
5174 if (CCInfo.getNextStackOffset()) {
5175 // Check if the arguments are already laid out in the right way as
5176 // the caller's fixed stack objects.
5177 MachineFrameInfo &MFI = MF.getFrameInfo();
5178 const MachineRegisterInfo *MRI = &MF.getRegInfo();
5179 const X86InstrInfo *TII = Subtarget.getInstrInfo();
5180 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5181 CCValAssign &VA = ArgLocs[i];
5182 SDValue Arg = OutVals[i];
5183 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5184 if (VA.getLocInfo() == CCValAssign::Indirect)
5185 return false;
5186 if (!VA.isRegLoc()) {
5187 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
5188 MFI, MRI, TII, VA))
5189 return false;
5190 }
5191 }
5192 }
5193
5194 bool PositionIndependent = isPositionIndependent();
5195 // If the tailcall address may be in a register, then make sure it's
5196 // possible to register allocate for it. In 32-bit, the call address can
5197 // only target EAX, EDX, or ECX since the tail call must be scheduled after
5198 // callee-saved registers are restored. These happen to be the same
5199 // registers used to pass 'inreg' arguments so watch out for those.
5200 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
5201 !isa<ExternalSymbolSDNode>(Callee)) ||
5202 PositionIndependent)) {
5203 unsigned NumInRegs = 0;
5204 // In PIC we need an extra register to formulate the address computation
5205 // for the callee.
5206 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
5207
5208 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5209 CCValAssign &VA = ArgLocs[i];
5210 if (!VA.isRegLoc())
5211 continue;
5212 Register Reg = VA.getLocReg();
5213 switch (Reg) {
5214 default: break;
5215 case X86::EAX: case X86::EDX: case X86::ECX:
5216 if (++NumInRegs == MaxInRegs)
5217 return false;
5218 break;
5219 }
5220 }
5221 }
5222
5223 const MachineRegisterInfo &MRI = MF.getRegInfo();
5224 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5225 return false;
5226 }
5227
5228 bool CalleeWillPop =
5229 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
5230 MF.getTarget().Options.GuaranteedTailCallOpt);
5231
5232 if (unsigned BytesToPop =
5233 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
5234 // If we have bytes to pop, the callee must pop them.
5235 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
5236 if (!CalleePopMatches)
5237 return false;
5238 } else if (CalleeWillPop && StackArgsSize > 0) {
5239 // If we don't have bytes to pop, make sure the callee doesn't pop any.
5240 return false;
5241 }
5242
5243 return true;
5244}
5245
5246FastISel *
5247X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
5248 const TargetLibraryInfo *libInfo) const {
5249 return X86::createFastISel(funcInfo, libInfo);
5250}
5251
5252//===----------------------------------------------------------------------===//
5253// Other Lowering Hooks
5254//===----------------------------------------------------------------------===//
5255
5256bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
5257 bool AssumeSingleUse) {
5258 if (!AssumeSingleUse && !Op.hasOneUse())
5259 return false;
5260 if (!ISD::isNormalLoad(Op.getNode()))
5261 return false;
5262
5263 // If this is an unaligned vector, make sure the target supports folding it.
5264 auto *Ld = cast<LoadSDNode>(Op.getNode());
5265 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
5266 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
5267 return false;
5268
5269 // TODO: If this is a non-temporal load and the target has an instruction
5270 // for it, it should not be folded. See "useNonTemporalLoad()".
5271
5272 return true;
5273}
5274
5275bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
5276 const X86Subtarget &Subtarget,
5277 bool AssumeSingleUse) {
5278 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory")(static_cast <bool> (Subtarget.hasAVX() && "Expected AVX for broadcast from memory"
) ? void (0) : __assert_fail ("Subtarget.hasAVX() && \"Expected AVX for broadcast from memory\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5278, __extension__
__PRETTY_FUNCTION__))
;
5279 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
5280 return false;
5281
5282 // We can not replace a wide volatile load with a broadcast-from-memory,
5283 // because that would narrow the load, which isn't legal for volatiles.
5284 auto *Ld = cast<LoadSDNode>(Op.getNode());
5285 return !Ld->isVolatile() ||
5286 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
5287}
5288
5289bool X86::mayFoldIntoStore(SDValue Op) {
5290 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
5291}
5292
5293bool X86::mayFoldIntoZeroExtend(SDValue Op) {
5294 if (Op.hasOneUse()) {
5295 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
5296 return (ISD::ZERO_EXTEND == Opcode);
5297 }
5298 return false;
5299}
5300
5301static bool isTargetShuffle(unsigned Opcode) {
5302 switch(Opcode) {
5303 default: return false;
5304 case X86ISD::BLENDI:
5305 case X86ISD::PSHUFB:
5306 case X86ISD::PSHUFD:
5307 case X86ISD::PSHUFHW:
5308 case X86ISD::PSHUFLW:
5309 case X86ISD::SHUFP:
5310 case X86ISD::INSERTPS:
5311 case X86ISD::EXTRQI:
5312 case X86ISD::INSERTQI:
5313 case X86ISD::VALIGN:
5314 case X86ISD::PALIGNR:
5315 case X86ISD::VSHLDQ:
5316 case X86ISD::VSRLDQ:
5317 case X86ISD::MOVLHPS:
5318 case X86ISD::MOVHLPS:
5319 case X86ISD::MOVSHDUP:
5320 case X86ISD::MOVSLDUP:
5321 case X86ISD::MOVDDUP:
5322 case X86ISD::MOVSS:
5323 case X86ISD::MOVSD:
5324 case X86ISD::MOVSH:
5325 case X86ISD::UNPCKL:
5326 case X86ISD::UNPCKH:
5327 case X86ISD::VBROADCAST:
5328 case X86ISD::VPERMILPI:
5329 case X86ISD::VPERMILPV:
5330 case X86ISD::VPERM2X128:
5331 case X86ISD::SHUF128:
5332 case X86ISD::VPERMIL2:
5333 case X86ISD::VPERMI:
5334 case X86ISD::VPPERM:
5335 case X86ISD::VPERMV:
5336 case X86ISD::VPERMV3:
5337 case X86ISD::VZEXT_MOVL:
5338 return true;
5339 }
5340}
5341
5342static bool isTargetShuffleVariableMask(unsigned Opcode) {
5343 switch (Opcode) {
5344 default: return false;
5345 // Target Shuffles.
5346 case X86ISD::PSHUFB:
5347 case X86ISD::VPERMILPV:
5348 case X86ISD::VPERMIL2:
5349 case X86ISD::VPPERM:
5350 case X86ISD::VPERMV:
5351 case X86ISD::VPERMV3:
5352 return true;
5353 // 'Faux' Target Shuffles.
5354 case ISD::OR:
5355 case ISD::AND:
5356 case X86ISD::ANDNP:
5357 return true;
5358 }
5359}
5360
5361SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
5362 MachineFunction &MF = DAG.getMachineFunction();
5363 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5364 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
5365 int ReturnAddrIndex = FuncInfo->getRAIndex();
5366
5367 if (ReturnAddrIndex == 0) {
5368 // Set up a frame object for the return address.
5369 unsigned SlotSize = RegInfo->getSlotSize();
5370 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
5371 -(int64_t)SlotSize,
5372 false);
5373 FuncInfo->setRAIndex(ReturnAddrIndex);
5374 }
5375
5376 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
5377}
5378
5379bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
5380 bool hasSymbolicDisplacement) {
5381 // Offset should fit into 32 bit immediate field.
5382 if (!isInt<32>(Offset))
5383 return false;
5384
5385 // If we don't have a symbolic displacement - we don't have any extra
5386 // restrictions.
5387 if (!hasSymbolicDisplacement)
5388 return true;
5389
5390 // FIXME: Some tweaks might be needed for medium code model.
5391 if (M != CodeModel::Small && M != CodeModel::Kernel)
5392 return false;
5393
5394 // For small code model we assume that latest object is 16MB before end of 31
5395 // bits boundary. We may also accept pretty large negative constants knowing
5396 // that all objects are in the positive half of address space.
5397 if (M == CodeModel::Small && Offset < 16*1024*1024)
5398 return true;
5399
5400 // For kernel code model we know that all object resist in the negative half
5401 // of 32bits address space. We may not accept negative offsets, since they may
5402 // be just off and we may accept pretty large positive ones.
5403 if (M == CodeModel::Kernel && Offset >= 0)
5404 return true;
5405
5406 return false;
5407}
5408
5409/// Determines whether the callee is required to pop its own arguments.
5410/// Callee pop is necessary to support tail calls.
5411bool X86::isCalleePop(CallingConv::ID CallingConv,
5412 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
5413 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
5414 // can guarantee TCO.
5415 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
5416 return true;
5417
5418 switch (CallingConv) {
5419 default:
5420 return false;
5421 case CallingConv::X86_StdCall:
5422 case CallingConv::X86_FastCall:
5423 case CallingConv::X86_ThisCall:
5424 case CallingConv::X86_VectorCall:
5425 return !is64Bit;
5426 }
5427}
5428
5429/// Return true if the condition is an signed comparison operation.
5430static bool isX86CCSigned(unsigned X86CC) {
5431 switch (X86CC) {
5432 default:
5433 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5433)
;
5434 case X86::COND_E:
5435 case X86::COND_NE:
5436 case X86::COND_B:
5437 case X86::COND_A:
5438 case X86::COND_BE:
5439 case X86::COND_AE:
5440 return false;
5441 case X86::COND_G:
5442 case X86::COND_GE:
5443 case X86::COND_L:
5444 case X86::COND_LE:
5445 return true;
5446 }
5447}
5448
5449static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5450 switch (SetCCOpcode) {
5451 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5451)
;
5452 case ISD::SETEQ: return X86::COND_E;
5453 case ISD::SETGT: return X86::COND_G;
5454 case ISD::SETGE: return X86::COND_GE;
5455 case ISD::SETLT: return X86::COND_L;
5456 case ISD::SETLE: return X86::COND_LE;
5457 case ISD::SETNE: return X86::COND_NE;
5458 case ISD::SETULT: return X86::COND_B;
5459 case ISD::SETUGT: return X86::COND_A;
5460 case ISD::SETULE: return X86::COND_BE;
5461 case ISD::SETUGE: return X86::COND_AE;
5462 }
5463}
5464
5465/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5466/// condition code, returning the condition code and the LHS/RHS of the
5467/// comparison to make.
5468static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5469 bool isFP, SDValue &LHS, SDValue &RHS,
5470 SelectionDAG &DAG) {
5471 if (!isFP) {
5472 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5473 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
5474 // X > -1 -> X == 0, jump !sign.
5475 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5476 return X86::COND_NS;
5477 }
5478 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
5479 // X < 0 -> X == 0, jump on sign.
5480 return X86::COND_S;
5481 }
5482 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
5483 // X >= 0 -> X == 0, jump on !sign.
5484 return X86::COND_NS;
5485 }
5486 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5487 // X < 1 -> X <= 0
5488 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5489 return X86::COND_LE;
5490 }
5491 }
5492
5493 return TranslateIntegerX86CC(SetCCOpcode);
5494 }
5495
5496 // First determine if it is required or is profitable to flip the operands.
5497
5498 // If LHS is a foldable load, but RHS is not, flip the condition.
5499 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5500 !ISD::isNON_EXTLoad(RHS.getNode())) {
5501 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5502 std::swap(LHS, RHS);
5503 }
5504
5505 switch (SetCCOpcode) {
5506 default: break;
5507 case ISD::SETOLT:
5508 case ISD::SETOLE:
5509 case ISD::SETUGT:
5510 case ISD::SETUGE:
5511 std::swap(LHS, RHS);
5512 break;
5513 }
5514
5515 // On a floating point condition, the flags are set as follows:
5516 // ZF PF CF op
5517 // 0 | 0 | 0 | X > Y
5518 // 0 | 0 | 1 | X < Y
5519 // 1 | 0 | 0 | X == Y
5520 // 1 | 1 | 1 | unordered
5521 switch (SetCCOpcode) {
5522 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5522)
;
5523 case ISD::SETUEQ:
5524 case ISD::SETEQ: return X86::COND_E;
5525 case ISD::SETOLT: // flipped
5526 case ISD::SETOGT:
5527 case ISD::SETGT: return X86::COND_A;
5528 case ISD::SETOLE: // flipped
5529 case ISD::SETOGE:
5530 case ISD::SETGE: return X86::COND_AE;
5531 case ISD::SETUGT: // flipped
5532 case ISD::SETULT:
5533 case ISD::SETLT: return X86::COND_B;
5534 case ISD::SETUGE: // flipped
5535 case ISD::SETULE:
5536 case ISD::SETLE: return X86::COND_BE;
5537 case ISD::SETONE:
5538 case ISD::SETNE: return X86::COND_NE;
5539 case ISD::SETUO: return X86::COND_P;
5540 case ISD::SETO: return X86::COND_NP;
5541 case ISD::SETOEQ:
5542 case ISD::SETUNE: return X86::COND_INVALID;
5543 }
5544}
5545
5546/// Is there a floating point cmov for the specific X86 condition code?
5547/// Current x86 isa includes the following FP cmov instructions:
5548/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5549static bool hasFPCMov(unsigned X86CC) {
5550 switch (X86CC) {
5551 default:
5552 return false;
5553 case X86::COND_B:
5554 case X86::COND_BE:
5555 case X86::COND_E:
5556 case X86::COND_P:
5557 case X86::COND_A:
5558 case X86::COND_AE:
5559 case X86::COND_NE:
5560 case X86::COND_NP:
5561 return true;
5562 }
5563}
5564
5565static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
5566 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
5567 VT.is512BitVector();
5568}
5569
5570bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5571 const CallInst &I,
5572 MachineFunction &MF,
5573 unsigned Intrinsic) const {
5574 Info.flags = MachineMemOperand::MONone;
5575 Info.offset = 0;
5576
5577 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5578 if (!IntrData) {
5579 switch (Intrinsic) {
5580 case Intrinsic::x86_aesenc128kl:
5581 case Intrinsic::x86_aesdec128kl:
5582 Info.opc = ISD::INTRINSIC_W_CHAIN;
5583 Info.ptrVal = I.getArgOperand(1);
5584 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5585 Info.align = Align(1);
5586 Info.flags |= MachineMemOperand::MOLoad;
5587 return true;
5588 case Intrinsic::x86_aesenc256kl:
5589 case Intrinsic::x86_aesdec256kl:
5590 Info.opc = ISD::INTRINSIC_W_CHAIN;
5591 Info.ptrVal = I.getArgOperand(1);
5592 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5593 Info.align = Align(1);
5594 Info.flags |= MachineMemOperand::MOLoad;
5595 return true;
5596 case Intrinsic::x86_aesencwide128kl:
5597 case Intrinsic::x86_aesdecwide128kl:
5598 Info.opc = ISD::INTRINSIC_W_CHAIN;
5599 Info.ptrVal = I.getArgOperand(0);
5600 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5601 Info.align = Align(1);
5602 Info.flags |= MachineMemOperand::MOLoad;
5603 return true;
5604 case Intrinsic::x86_aesencwide256kl:
5605 case Intrinsic::x86_aesdecwide256kl:
5606 Info.opc = ISD::INTRINSIC_W_CHAIN;
5607 Info.ptrVal = I.getArgOperand(0);
5608 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5609 Info.align = Align(1);
5610 Info.flags |= MachineMemOperand::MOLoad;
5611 return true;
5612 case Intrinsic::x86_atomic_bts:
5613 case Intrinsic::x86_atomic_btc:
5614 case Intrinsic::x86_atomic_btr: {
5615 Info.opc = ISD::INTRINSIC_W_CHAIN;
5616 Info.ptrVal = I.getArgOperand(0);
5617 unsigned Size = I.getType()->getScalarSizeInBits();
5618 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5619 Info.align = Align(Size);
5620 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5621 MachineMemOperand::MOVolatile;
5622 return true;
5623 }
5624 }
5625 return false;
5626 }
5627
5628 switch (IntrData->Type) {
5629 case TRUNCATE_TO_MEM_VI8:
5630 case TRUNCATE_TO_MEM_VI16:
5631 case TRUNCATE_TO_MEM_VI32: {
5632 Info.opc = ISD::INTRINSIC_VOID;
5633 Info.ptrVal = I.getArgOperand(0);
5634 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5635 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5636 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5637 ScalarVT = MVT::i8;
5638 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5639 ScalarVT = MVT::i16;
5640 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5641 ScalarVT = MVT::i32;
5642
5643 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5644 Info.align = Align(1);
5645 Info.flags |= MachineMemOperand::MOStore;
5646 break;
5647 }
5648 case GATHER:
5649 case GATHER_AVX2: {
5650 Info.opc = ISD::INTRINSIC_W_CHAIN;
5651 Info.ptrVal = nullptr;
5652 MVT DataVT = MVT::getVT(I.getType());
5653 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5654 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5655 IndexVT.getVectorNumElements());
5656 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5657 Info.align = Align(1);
5658 Info.flags |= MachineMemOperand::MOLoad;
5659 break;
5660 }
5661 case SCATTER: {
5662 Info.opc = ISD::INTRINSIC_VOID;
5663 Info.ptrVal = nullptr;
5664 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5665 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5666 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5667 IndexVT.getVectorNumElements());
5668 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5669 Info.align = Align(1);
5670 Info.flags |= MachineMemOperand::MOStore;
5671 break;
5672 }
5673 default:
5674 return false;
5675 }
5676
5677 return true;
5678}
5679
5680/// Returns true if the target can instruction select the
5681/// specified FP immediate natively. If false, the legalizer will
5682/// materialize the FP immediate as a load from a constant pool.
5683bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5684 bool ForCodeSize) const {
5685 for (const APFloat &FPImm : LegalFPImmediates)
5686 if (Imm.bitwiseIsEqual(FPImm))
5687 return true;
5688 return false;
5689}
5690
5691bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5692 ISD::LoadExtType ExtTy,
5693 EVT NewVT) const {
5694 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5694, __extension__
__PRETTY_FUNCTION__))
;
5695
5696 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5697 // relocation target a movq or addq instruction: don't let the load shrink.
5698 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5699 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5700 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5701 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5702
5703 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5704 // those uses are extracted directly into a store, then the extract + store
5705 // can be store-folded. Therefore, it's probably not worth splitting the load.
5706 EVT VT = Load->getValueType(0);
5707 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5708 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5709 // Skip uses of the chain value. Result 0 of the node is the load value.
5710 if (UI.getUse().getResNo() != 0)
5711 continue;
5712
5713 // If this use is not an extract + store, it's probably worth splitting.
5714 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5715 UI->use_begin()->getOpcode() != ISD::STORE)
5716 return true;
5717 }
5718 // All non-chain uses are extract + store.
5719 return false;
5720 }
5721
5722 return true;
5723}
5724
5725/// Returns true if it is beneficial to convert a load of a constant
5726/// to just the constant itself.
5727bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5728 Type *Ty) const {
5729 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5729, __extension__ __PRETTY_FUNCTION__))
;
5730
5731 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5732 if (BitSize == 0 || BitSize > 64)
5733 return false;
5734 return true;
5735}
5736
5737bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5738 // If we are using XMM registers in the ABI and the condition of the select is
5739 // a floating-point compare and we have blendv or conditional move, then it is
5740 // cheaper to select instead of doing a cross-register move and creating a
5741 // load that depends on the compare result.
5742 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5743 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5744}
5745
5746bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5747 // TODO: It might be a win to ease or lift this restriction, but the generic
5748 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5749 if (VT.isVector() && Subtarget.hasAVX512())
5750 return false;
5751
5752 return true;
5753}
5754
5755bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5756 SDValue C) const {
5757 // TODO: We handle scalars using custom code, but generic combining could make
5758 // that unnecessary.
5759 APInt MulC;
5760 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5761 return false;
5762
5763 // Find the type this will be legalized too. Otherwise we might prematurely
5764 // convert this to shl+add/sub and then still have to type legalize those ops.
5765 // Another choice would be to defer the decision for illegal types until
5766 // after type legalization. But constant splat vectors of i64 can't make it
5767 // through type legalization on 32-bit targets so we would need to special
5768 // case vXi64.
5769 while (getTypeAction(Context, VT) != TypeLegal)
5770 VT = getTypeToTransformTo(Context, VT);
5771
5772 // If vector multiply is legal, assume that's faster than shl + add/sub.
5773 // Multiply is a complex op with higher latency and lower throughput in
5774 // most implementations, sub-vXi32 vector multiplies are always fast,
5775 // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
5776 // is always going to be slow.
5777 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5778 if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
5779 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
5780 return false;
5781
5782 // shl+add, shl+sub, shl+add+neg
5783 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5784 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5785}
5786
5787bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5788 unsigned Index) const {
5789 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5790 return false;
5791
5792 // Mask vectors support all subregister combinations and operations that
5793 // extract half of vector.
5794 if (ResVT.getVectorElementType() == MVT::i1)
5795 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5796 (Index == ResVT.getVectorNumElements()));
5797
5798 return (Index % ResVT.getVectorNumElements()) == 0;
5799}
5800
5801bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5802 unsigned Opc = VecOp.getOpcode();
5803
5804 // Assume target opcodes can't be scalarized.
5805 // TODO - do we have any exceptions?
5806 if (Opc >= ISD::BUILTIN_OP_END)
5807 return false;
5808
5809 // If the vector op is not supported, try to convert to scalar.
5810 EVT VecVT = VecOp.getValueType();
5811 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5812 return true;
5813
5814 // If the vector op is supported, but the scalar op is not, the transform may
5815 // not be worthwhile.
5816 EVT ScalarVT = VecVT.getScalarType();
5817 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5818}
5819
5820bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5821 bool) const {
5822 // TODO: Allow vectors?
5823 if (VT.isVector())
5824 return false;
5825 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5826}
5827
5828bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
5829 // Speculate cttz only if we can directly use TZCNT or can promote to i32.
5830 return Subtarget.hasBMI() ||
5831 (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
5832}
5833
5834bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
5835 // Speculate ctlz only if we can directly use LZCNT.
5836 return Subtarget.hasLZCNT();
5837}
5838
5839bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
5840 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
5841}
5842
5843bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
5844 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
5845 // expensive than a straight movsd. On the other hand, it's important to
5846 // shrink long double fp constant since fldt is very slow.
5847 return !Subtarget.hasSSE2() || VT == MVT::f80;
5848}
5849
5850bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {
5851 return (VT == MVT::f64 && Subtarget.hasSSE2()) ||
5852 (VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16;
5853}
5854
5855bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5856 const SelectionDAG &DAG,
5857 const MachineMemOperand &MMO) const {
5858 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5859 BitcastVT.getVectorElementType() == MVT::i1)
5860 return false;
5861
5862 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5863 return false;
5864
5865 // If both types are legal vectors, it's always ok to convert them.
5866 if (LoadVT.isVector() && BitcastVT.isVector() &&
5867 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5868 return true;
5869
5870 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5871}
5872
5873bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5874 const MachineFunction &MF) const {
5875 // Do not merge to float value size (128 bytes) if no implicit
5876 // float attribute is set.
5877 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
5878
5879 if (NoFloat) {
5880 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5881 return (MemVT.getSizeInBits() <= MaxIntSize);
5882 }
5883 // Make sure we don't merge greater than our preferred vector
5884 // width.
5885 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5886 return false;
5887
5888 return true;
5889}
5890
5891bool X86TargetLowering::isCtlzFast() const {
5892 return Subtarget.hasFastLZCNT();
5893}
5894
5895bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5896 const Instruction &AndI) const {
5897 return true;
5898}
5899
5900bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5901 EVT VT = Y.getValueType();
5902
5903 if (VT.isVector())
5904 return false;
5905
5906 if (!Subtarget.hasBMI())
5907 return false;
5908
5909 // There are only 32-bit and 64-bit forms for 'andn'.
5910 if (VT != MVT::i32 && VT != MVT::i64)
5911 return false;
5912
5913 return !isa<ConstantSDNode>(Y);
5914}
5915
5916bool X86TargetLowering::hasAndNot(SDValue Y) const {
5917 EVT VT = Y.getValueType();
5918
5919 if (!VT.isVector())
5920 return hasAndNotCompare(Y);
5921
5922 // Vector.
5923
5924 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5925 return false;
5926
5927 if (VT == MVT::v4i32)
5928 return true;
5929
5930 return Subtarget.hasSSE2();
5931}
5932
5933bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5934 return X.getValueType().isScalarInteger(); // 'bt'
5935}
5936
5937bool X86TargetLowering::
5938 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5939 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5940 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5941 SelectionDAG &DAG) const {
5942 // Does baseline recommend not to perform the fold by default?
5943 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5944 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5945 return false;
5946 // For scalars this transform is always beneficial.
5947 if (X.getValueType().isScalarInteger())
5948 return true;
5949 // If all the shift amounts are identical, then transform is beneficial even
5950 // with rudimentary SSE2 shifts.
5951 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5952 return true;
5953 // If we have AVX2 with it's powerful shift operations, then it's also good.
5954 if (Subtarget.hasAVX2())
5955 return true;
5956 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5957 re