Bug Summary

File:build/source/llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 44894, column 39
The result of the left shift is undefined due to shifting by '4294967291', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/X86 -I /build/source/llvm/lib/Target/X86 -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -source-date-epoch 1679443490 -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-03-22-005342-16304-1 -x c++ /build/source/llvm/lib/Target/X86/X86ISelLowering.cpp
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/ObjCARCUtil.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/IntrinsicLowering.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineJumpTableInfo.h"
38#include "llvm/CodeGen/MachineLoopInfo.h"
39#include "llvm/CodeGen/MachineModuleInfo.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/TargetLowering.h"
42#include "llvm/CodeGen/WinEHFuncInfo.h"
43#include "llvm/IR/CallingConv.h"
44#include "llvm/IR/Constants.h"
45#include "llvm/IR/DerivedTypes.h"
46#include "llvm/IR/DiagnosticInfo.h"
47#include "llvm/IR/EHPersonalities.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/PatternMatch.h"
55#include "llvm/MC/MCAsmInfo.h"
56#include "llvm/MC/MCContext.h"
57#include "llvm/MC/MCExpr.h"
58#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <bitset>
67#include <cctype>
68#include <numeric>
69using namespace llvm;
70
71#define DEBUG_TYPE"x86-isel" "x86-isel"
72
73STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
74
75static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
76 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
77 cl::desc(
78 "Sets the preferable loop alignment for experiments (as log2 bytes) "
79 "for innermost loops only. If specified, this option overrides "
80 "alignment set by x86-experimental-pref-loop-alignment."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107/// Returns true if a CC can dynamically exclude a register from the list of
108/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
109/// the return registers.
110static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
111 switch (CC) {
112 default:
113 return false;
114 case CallingConv::X86_RegCall:
115 case CallingConv::PreserveMost:
116 case CallingConv::PreserveAll:
117 return true;
118 }
119}
120
121/// Returns true if a CC can dynamically exclude a register from the list of
122/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
123/// the parameters.
124static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
125 return CC == CallingConv::X86_RegCall;
126}
127
128X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
129 const X86Subtarget &STI)
130 : TargetLowering(TM), Subtarget(STI) {
131 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
132 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
133
134 // Set up the TargetLowering object.
135
136 // X86 is weird. It always uses i8 for shift amounts and setcc results.
137 setBooleanContents(ZeroOrOneBooleanContent);
138 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
139 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
140
141 // For 64-bit, since we have so many registers, use the ILP scheduler.
142 // For 32-bit, use the register pressure specific scheduling.
143 // For Atom, always use ILP scheduling.
144 if (Subtarget.isAtom())
145 setSchedulingPreference(Sched::ILP);
146 else if (Subtarget.is64Bit())
147 setSchedulingPreference(Sched::ILP);
148 else
149 setSchedulingPreference(Sched::RegPressure);
150 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
151 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
152
153 // Bypass expensive divides and use cheaper ones.
154 if (TM.getOptLevel() >= CodeGenOpt::Default) {
155 if (Subtarget.hasSlowDivide32())
156 addBypassSlowDiv(32, 8);
157 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
158 addBypassSlowDiv(64, 32);
159 }
160
161 // Setup Windows compiler runtime calls.
162 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
163 static const struct {
164 const RTLIB::Libcall Op;
165 const char * const Name;
166 const CallingConv::ID CC;
167 } LibraryCalls[] = {
168 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
169 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
170 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
171 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
172 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
173 };
174
175 for (const auto &LC : LibraryCalls) {
176 setLibcallName(LC.Op, LC.Name);
177 setLibcallCallingConv(LC.Op, LC.CC);
178 }
179 }
180
181 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
182 // MSVCRT doesn't have powi; fall back to pow
183 setLibcallName(RTLIB::POWI_F32, nullptr);
184 setLibcallName(RTLIB::POWI_F64, nullptr);
185 }
186
187 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
188 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
189 // FIXME: Should we be limiting the atomic size on other configs? Default is
190 // 1024.
191 if (!Subtarget.canUseCMPXCHG8B())
192 setMaxAtomicSizeInBitsSupported(32);
193
194 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
195
196 setMaxLargeFPConvertBitWidthSupported(128);
197
198 // Set up the register classes.
199 addRegisterClass(MVT::i8, &X86::GR8RegClass);
200 addRegisterClass(MVT::i16, &X86::GR16RegClass);
201 addRegisterClass(MVT::i32, &X86::GR32RegClass);
202 if (Subtarget.is64Bit())
203 addRegisterClass(MVT::i64, &X86::GR64RegClass);
204
205 for (MVT VT : MVT::integer_valuetypes())
206 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
207
208 // We don't accept any truncstore of integer registers.
209 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
210 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
211 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
212 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
213 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
214 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
215
216 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
217
218 // SETOEQ and SETUNE require checking two conditions.
219 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
220 setCondCodeAction(ISD::SETOEQ, VT, Expand);
221 setCondCodeAction(ISD::SETUNE, VT, Expand);
222 }
223
224 // Integer absolute.
225 if (Subtarget.canUseCMOV()) {
226 setOperationAction(ISD::ABS , MVT::i16 , Custom);
227 setOperationAction(ISD::ABS , MVT::i32 , Custom);
228 if (Subtarget.is64Bit())
229 setOperationAction(ISD::ABS , MVT::i64 , Custom);
230 }
231
232 // Signed saturation subtraction.
233 setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
234 setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
235 setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
236 if (Subtarget.is64Bit())
237 setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
238
239 // Funnel shifts.
240 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
241 // For slow shld targets we only lower for code size.
242 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
243
244 setOperationAction(ShiftOp , MVT::i8 , Custom);
245 setOperationAction(ShiftOp , MVT::i16 , Custom);
246 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
247 if (Subtarget.is64Bit())
248 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
249 }
250
251 if (!Subtarget.useSoftFloat()) {
252 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
253 // operation.
254 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
255 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
256 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
257 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
258 // We have an algorithm for SSE2, and we turn this into a 64-bit
259 // FILD or VCVTUSI2SS/SD for other targets.
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
261 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
262 // We have an algorithm for SSE2->double, and we turn this into a
263 // 64-bit FILD followed by conditional FADD for other targets.
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
266
267 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
268 // this operation.
269 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
270 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
271 // SSE has no i16 to fp conversion, only i32. We promote in the handler
272 // to allow f80 to use i16 and f64 to use i16 with sse1 only
273 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
274 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
275 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
276 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
277 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
278 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
279 // are Legal, f80 is custom lowered.
280 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
281 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
282
283 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
284 // this operation.
285 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
286 // FIXME: This doesn't generate invalid exception when it should. PR44019.
287 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
288 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
289 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
290 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
291 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
292 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
293 // are Legal, f80 is custom lowered.
294 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
295 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
296
297 // Handle FP_TO_UINT by promoting the destination to a larger signed
298 // conversion.
299 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
300 // FIXME: This doesn't generate invalid exception when it should. PR44019.
301 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
302 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
303 // FIXME: This doesn't generate invalid exception when it should. PR44019.
304 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
305 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
306 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
307 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
308 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
309
310 setOperationAction(ISD::LRINT, MVT::f32, Custom);
311 setOperationAction(ISD::LRINT, MVT::f64, Custom);
312 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
313 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
314
315 if (!Subtarget.is64Bit()) {
316 setOperationAction(ISD::LRINT, MVT::i64, Custom);
317 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
318 }
319 }
320
321 if (Subtarget.hasSSE2()) {
322 // Custom lowering for saturating float to int conversions.
323 // We handle promotion to larger result types manually.
324 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
325 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
326 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
327 }
328 if (Subtarget.is64Bit()) {
329 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
330 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
331 }
332 }
333
334 // Handle address space casts between mixed sized pointers.
335 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
336 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
337
338 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
339 if (!Subtarget.hasSSE2()) {
340 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
341 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
342 if (Subtarget.is64Bit()) {
343 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
344 // Without SSE, i64->f64 goes through memory.
345 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
346 }
347 } else if (!Subtarget.is64Bit())
348 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
349
350 // Scalar integer divide and remainder are lowered to use operations that
351 // produce two results, to match the available instructions. This exposes
352 // the two-result form to trivial CSE, which is able to combine x/y and x%y
353 // into a single instruction.
354 //
355 // Scalar integer multiply-high is also lowered to use two-result
356 // operations, to match the available instructions. However, plain multiply
357 // (low) operations are left as Legal, as there are single-result
358 // instructions for this in x86. Using the two-result multiply instructions
359 // when both high and low results are needed must be arranged by dagcombine.
360 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
361 setOperationAction(ISD::MULHS, VT, Expand);
362 setOperationAction(ISD::MULHU, VT, Expand);
363 setOperationAction(ISD::SDIV, VT, Expand);
364 setOperationAction(ISD::UDIV, VT, Expand);
365 setOperationAction(ISD::SREM, VT, Expand);
366 setOperationAction(ISD::UREM, VT, Expand);
367 }
368
369 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
370 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
371 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
372 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
373 setOperationAction(ISD::BR_CC, VT, Expand);
374 setOperationAction(ISD::SELECT_CC, VT, Expand);
375 }
376 if (Subtarget.is64Bit())
377 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
378 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
379 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
380 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
381
382 setOperationAction(ISD::FREM , MVT::f32 , Expand);
383 setOperationAction(ISD::FREM , MVT::f64 , Expand);
384 setOperationAction(ISD::FREM , MVT::f80 , Expand);
385 setOperationAction(ISD::FREM , MVT::f128 , Expand);
386
387 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
388 setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
389 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
390 }
391
392 // Promote the i8 variants and force them on up to i32 which has a shorter
393 // encoding.
394 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
395 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
396 // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
397 // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
398 // promote that too.
399 setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
400 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
401
402 if (!Subtarget.hasBMI()) {
403 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
404 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
405 if (Subtarget.is64Bit()) {
406 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
407 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
408 }
409 }
410
411 if (Subtarget.hasLZCNT()) {
412 // When promoting the i8 variants, force them to i32 for a shorter
413 // encoding.
414 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
415 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
416 } else {
417 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
418 if (VT == MVT::i64 && !Subtarget.is64Bit())
419 continue;
420 setOperationAction(ISD::CTLZ , VT, Custom);
421 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
422 }
423 }
424
425 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
426 ISD::STRICT_FP_TO_FP16}) {
427 // Special handling for half-precision floating point conversions.
428 // If we don't have F16C support, then lower half float conversions
429 // into library calls.
430 setOperationAction(
431 Op, MVT::f32,
432 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
433 // There's never any support for operations beyond MVT::f32.
434 setOperationAction(Op, MVT::f64, Expand);
435 setOperationAction(Op, MVT::f80, Expand);
436 setOperationAction(Op, MVT::f128, Expand);
437 }
438
439 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
440 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
441 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
442 setTruncStoreAction(VT, MVT::f16, Expand);
443 setTruncStoreAction(VT, MVT::bf16, Expand);
444
445 setOperationAction(ISD::BF16_TO_FP, VT, Expand);
446 setOperationAction(ISD::FP_TO_BF16, VT, Custom);
447 }
448
449 setOperationAction(ISD::PARITY, MVT::i8, Custom);
450 setOperationAction(ISD::PARITY, MVT::i16, Custom);
451 setOperationAction(ISD::PARITY, MVT::i32, Custom);
452 if (Subtarget.is64Bit())
453 setOperationAction(ISD::PARITY, MVT::i64, Custom);
454 if (Subtarget.hasPOPCNT()) {
455 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
456 // popcntw is longer to encode than popcntl and also has a false dependency
457 // on the dest that popcntl hasn't had since Cannon Lake.
458 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
459 } else {
460 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
461 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
462 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
463 if (Subtarget.is64Bit())
464 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
465 else
466 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
467 }
468
469 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
470
471 if (!Subtarget.hasMOVBE())
472 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
473
474 // X86 wants to expand cmov itself.
475 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
476 setOperationAction(ISD::SELECT, VT, Custom);
477 setOperationAction(ISD::SETCC, VT, Custom);
478 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
479 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
480 }
481 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
482 if (VT == MVT::i64 && !Subtarget.is64Bit())
483 continue;
484 setOperationAction(ISD::SELECT, VT, Custom);
485 setOperationAction(ISD::SETCC, VT, Custom);
486 }
487
488 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
489 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
490 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
491
492 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
493 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
494 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
495 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
496 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
497 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
498 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
499 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
500
501 // Darwin ABI issue.
502 for (auto VT : { MVT::i32, MVT::i64 }) {
503 if (VT == MVT::i64 && !Subtarget.is64Bit())
504 continue;
505 setOperationAction(ISD::ConstantPool , VT, Custom);
506 setOperationAction(ISD::JumpTable , VT, Custom);
507 setOperationAction(ISD::GlobalAddress , VT, Custom);
508 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
509 setOperationAction(ISD::ExternalSymbol , VT, Custom);
510 setOperationAction(ISD::BlockAddress , VT, Custom);
511 }
512
513 // 64-bit shl, sra, srl (iff 32-bit x86)
514 for (auto VT : { MVT::i32, MVT::i64 }) {
515 if (VT == MVT::i64 && !Subtarget.is64Bit())
516 continue;
517 setOperationAction(ISD::SHL_PARTS, VT, Custom);
518 setOperationAction(ISD::SRA_PARTS, VT, Custom);
519 setOperationAction(ISD::SRL_PARTS, VT, Custom);
520 }
521
522 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
523 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
524
525 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
526
527 // Expand certain atomics
528 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
529 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
530 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
531 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
532 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
533 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
534 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
535 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
536 }
537
538 if (!Subtarget.is64Bit())
539 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
540
541 if (Subtarget.canUseCMPXCHG16B())
542 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
543
544 // FIXME - use subtarget debug flags
545 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
546 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
547 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
548 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
549 }
550
551 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
552 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
553
554 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
555 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
556
557 setOperationAction(ISD::TRAP, MVT::Other, Legal);
558 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
559 if (Subtarget.isTargetPS())
560 setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
561 else
562 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
563
564 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
565 setOperationAction(ISD::VASTART , MVT::Other, Custom);
566 setOperationAction(ISD::VAEND , MVT::Other, Expand);
567 bool Is64Bit = Subtarget.is64Bit();
568 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
569 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
570
571 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
572 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
573
574 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
575
576 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
577 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
578 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
579
580 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
581
582 auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
583 setOperationAction(ISD::FABS, VT, Action);
584 setOperationAction(ISD::FNEG, VT, Action);
585 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
586 setOperationAction(ISD::FREM, VT, Action);
587 setOperationAction(ISD::FMA, VT, Action);
588 setOperationAction(ISD::FMINNUM, VT, Action);
589 setOperationAction(ISD::FMAXNUM, VT, Action);
590 setOperationAction(ISD::FMINIMUM, VT, Action);
591 setOperationAction(ISD::FMAXIMUM, VT, Action);
592 setOperationAction(ISD::FSIN, VT, Action);
593 setOperationAction(ISD::FCOS, VT, Action);
594 setOperationAction(ISD::FSINCOS, VT, Action);
595 setOperationAction(ISD::FSQRT, VT, Action);
596 setOperationAction(ISD::FPOW, VT, Action);
597 setOperationAction(ISD::FLOG, VT, Action);
598 setOperationAction(ISD::FLOG2, VT, Action);
599 setOperationAction(ISD::FLOG10, VT, Action);
600 setOperationAction(ISD::FEXP, VT, Action);
601 setOperationAction(ISD::FEXP2, VT, Action);
602 setOperationAction(ISD::FCEIL, VT, Action);
603 setOperationAction(ISD::FFLOOR, VT, Action);
604 setOperationAction(ISD::FNEARBYINT, VT, Action);
605 setOperationAction(ISD::FRINT, VT, Action);
606 setOperationAction(ISD::BR_CC, VT, Action);
607 setOperationAction(ISD::SETCC, VT, Action);
608 setOperationAction(ISD::SELECT, VT, Custom);
609 setOperationAction(ISD::SELECT_CC, VT, Action);
610 setOperationAction(ISD::FROUND, VT, Action);
611 setOperationAction(ISD::FROUNDEVEN, VT, Action);
612 setOperationAction(ISD::FTRUNC, VT, Action);
613 };
614
615 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
616 // f16, f32 and f64 use SSE.
617 // Set up the FP register classes.
618 addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
619 : &X86::FR16RegClass);
620 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
621 : &X86::FR32RegClass);
622 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
623 : &X86::FR64RegClass);
624
625 // Disable f32->f64 extload as we can only generate this in one instruction
626 // under optsize. So its easier to pattern match (fpext (load)) for that
627 // case instead of needing to emit 2 instructions for extload in the
628 // non-optsize case.
629 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
630
631 for (auto VT : { MVT::f32, MVT::f64 }) {
632 // Use ANDPD to simulate FABS.
633 setOperationAction(ISD::FABS, VT, Custom);
634
635 // Use XORP to simulate FNEG.
636 setOperationAction(ISD::FNEG, VT, Custom);
637
638 // Use ANDPD and ORPD to simulate FCOPYSIGN.
639 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
640
641 // These might be better off as horizontal vector ops.
642 setOperationAction(ISD::FADD, VT, Custom);
643 setOperationAction(ISD::FSUB, VT, Custom);
644
645 // We don't support sin/cos/fmod
646 setOperationAction(ISD::FSIN , VT, Expand);
647 setOperationAction(ISD::FCOS , VT, Expand);
648 setOperationAction(ISD::FSINCOS, VT, Expand);
649 }
650
651 // Half type will be promoted by default.
652 setF16Action(MVT::f16, Promote);
653 setOperationAction(ISD::FADD, MVT::f16, Promote);
654 setOperationAction(ISD::FSUB, MVT::f16, Promote);
655 setOperationAction(ISD::FMUL, MVT::f16, Promote);
656 setOperationAction(ISD::FDIV, MVT::f16, Promote);
657 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
658 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
659 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
660
661 setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote);
662 setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote);
663 setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote);
664 setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote);
665 setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote);
666 setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote);
667 setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote);
668 setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote);
669 setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
670 setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
671 setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
672 setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
673 setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
674 setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
675 setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote);
676 setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote);
677 setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote);
678 setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote);
679 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote);
680 setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote);
681 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote);
682 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote);
683 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
684 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
685 setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
686 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
687 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
688 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
689
690 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
691 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
692
693 // Lower this to MOVMSK plus an AND.
694 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
695 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
696
697 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
698 (UseX87 || Is64Bit)) {
699 // Use SSE for f32, x87 for f64.
700 // Set up the FP register classes.
701 addRegisterClass(MVT::f32, &X86::FR32RegClass);
702 if (UseX87)
703 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
704
705 // Use ANDPS to simulate FABS.
706 setOperationAction(ISD::FABS , MVT::f32, Custom);
707
708 // Use XORP to simulate FNEG.
709 setOperationAction(ISD::FNEG , MVT::f32, Custom);
710
711 if (UseX87)
712 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
713
714 // Use ANDPS and ORPS to simulate FCOPYSIGN.
715 if (UseX87)
716 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
717 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
718
719 // We don't support sin/cos/fmod
720 setOperationAction(ISD::FSIN , MVT::f32, Expand);
721 setOperationAction(ISD::FCOS , MVT::f32, Expand);
722 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
723
724 if (UseX87) {
725 // Always expand sin/cos functions even though x87 has an instruction.
726 setOperationAction(ISD::FSIN, MVT::f64, Expand);
727 setOperationAction(ISD::FCOS, MVT::f64, Expand);
728 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
729 }
730 } else if (UseX87) {
731 // f32 and f64 in x87.
732 // Set up the FP register classes.
733 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
734 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
735
736 for (auto VT : { MVT::f32, MVT::f64 }) {
737 setOperationAction(ISD::UNDEF, VT, Expand);
738 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
739
740 // Always expand sin/cos functions even though x87 has an instruction.
741 setOperationAction(ISD::FSIN , VT, Expand);
742 setOperationAction(ISD::FCOS , VT, Expand);
743 setOperationAction(ISD::FSINCOS, VT, Expand);
744 }
745 }
746
747 // Expand FP32 immediates into loads from the stack, save special cases.
748 if (isTypeLegal(MVT::f32)) {
749 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
750 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
751 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
752 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
753 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
754 } else // SSE immediates.
755 addLegalFPImmediate(APFloat(+0.0f)); // xorps
756 }
757 // Expand FP64 immediates into loads from the stack, save special cases.
758 if (isTypeLegal(MVT::f64)) {
759 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
760 addLegalFPImmediate(APFloat(+0.0)); // FLD0
761 addLegalFPImmediate(APFloat(+1.0)); // FLD1
762 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
763 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
764 } else // SSE immediates.
765 addLegalFPImmediate(APFloat(+0.0)); // xorpd
766 }
767 // Support fp16 0 immediate.
768 if (isTypeLegal(MVT::f16))
769 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
770
771 // Handle constrained floating-point operations of scalar.
772 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
773 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
774 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
775 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
776 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
777 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
778 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
779 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
780 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
781 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
782 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
783 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
784
785 // We don't support FMA.
786 setOperationAction(ISD::FMA, MVT::f64, Expand);
787 setOperationAction(ISD::FMA, MVT::f32, Expand);
788
789 // f80 always uses X87.
790 if (UseX87) {
791 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
792 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
793 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
794 {
795 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
796 addLegalFPImmediate(TmpFlt); // FLD0
797 TmpFlt.changeSign();
798 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
799
800 bool ignored;
801 APFloat TmpFlt2(+1.0);
802 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
803 &ignored);
804 addLegalFPImmediate(TmpFlt2); // FLD1
805 TmpFlt2.changeSign();
806 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
807 }
808
809 // Always expand sin/cos functions even though x87 has an instruction.
810 setOperationAction(ISD::FSIN , MVT::f80, Expand);
811 setOperationAction(ISD::FCOS , MVT::f80, Expand);
812 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
813
814 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
815 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
816 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
817 setOperationAction(ISD::FRINT, MVT::f80, Expand);
818 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
819 setOperationAction(ISD::FMA, MVT::f80, Expand);
820 setOperationAction(ISD::LROUND, MVT::f80, Expand);
821 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
822 setOperationAction(ISD::LRINT, MVT::f80, Custom);
823 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
824
825 // Handle constrained floating-point operations of scalar.
826 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
827 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
828 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
829 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
830 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
831 if (isTypeLegal(MVT::f16)) {
832 setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
833 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
834 } else {
835 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
836 }
837 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
838 // as Custom.
839 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
840 }
841
842 // f128 uses xmm registers, but most operations require libcalls.
843 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
844 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
845 : &X86::VR128RegClass);
846
847 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
848
849 setOperationAction(ISD::FADD, MVT::f128, LibCall);
850 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
851 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
852 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
853 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
854 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
855 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
856 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
857 setOperationAction(ISD::FMA, MVT::f128, LibCall);
858 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
859
860 setOperationAction(ISD::FABS, MVT::f128, Custom);
861 setOperationAction(ISD::FNEG, MVT::f128, Custom);
862 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
863
864 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
865 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
866 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
867 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
868 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
869 // No STRICT_FSINCOS
870 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
871 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
872
873 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
874 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
875 // We need to custom handle any FP_ROUND with an f128 input, but
876 // LegalizeDAG uses the result type to know when to run a custom handler.
877 // So we have to list all legal floating point result types here.
878 if (isTypeLegal(MVT::f32)) {
879 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
880 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
881 }
882 if (isTypeLegal(MVT::f64)) {
883 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
884 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
885 }
886 if (isTypeLegal(MVT::f80)) {
887 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
888 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
889 }
890
891 setOperationAction(ISD::SETCC, MVT::f128, Custom);
892
893 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
894 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
895 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
896 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
897 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
898 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
899 }
900
901 // Always use a library call for pow.
902 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
903 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
904 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
905 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
906
907 setOperationAction(ISD::FLOG, MVT::f80, Expand);
908 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
909 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
910 setOperationAction(ISD::FEXP, MVT::f80, Expand);
911 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
912 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
913 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
914
915 // Some FP actions are always expanded for vector types.
916 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
917 MVT::v4f32, MVT::v8f32, MVT::v16f32,
918 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
919 setOperationAction(ISD::FSIN, VT, Expand);
920 setOperationAction(ISD::FSINCOS, VT, Expand);
921 setOperationAction(ISD::FCOS, VT, Expand);
922 setOperationAction(ISD::FREM, VT, Expand);
923 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
924 setOperationAction(ISD::FPOW, VT, Expand);
925 setOperationAction(ISD::FLOG, VT, Expand);
926 setOperationAction(ISD::FLOG2, VT, Expand);
927 setOperationAction(ISD::FLOG10, VT, Expand);
928 setOperationAction(ISD::FEXP, VT, Expand);
929 setOperationAction(ISD::FEXP2, VT, Expand);
930 }
931
932 // First set operation action for all vector types to either promote
933 // (for widening) or expand (for scalarization). Then we will selectively
934 // turn on ones that can be effectively codegen'd.
935 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
936 setOperationAction(ISD::SDIV, VT, Expand);
937 setOperationAction(ISD::UDIV, VT, Expand);
938 setOperationAction(ISD::SREM, VT, Expand);
939 setOperationAction(ISD::UREM, VT, Expand);
940 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
941 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
942 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
943 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
944 setOperationAction(ISD::FMA, VT, Expand);
945 setOperationAction(ISD::FFLOOR, VT, Expand);
946 setOperationAction(ISD::FCEIL, VT, Expand);
947 setOperationAction(ISD::FTRUNC, VT, Expand);
948 setOperationAction(ISD::FRINT, VT, Expand);
949 setOperationAction(ISD::FNEARBYINT, VT, Expand);
950 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
951 setOperationAction(ISD::MULHS, VT, Expand);
952 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
953 setOperationAction(ISD::MULHU, VT, Expand);
954 setOperationAction(ISD::SDIVREM, VT, Expand);
955 setOperationAction(ISD::UDIVREM, VT, Expand);
956 setOperationAction(ISD::CTPOP, VT, Expand);
957 setOperationAction(ISD::CTTZ, VT, Expand);
958 setOperationAction(ISD::CTLZ, VT, Expand);
959 setOperationAction(ISD::ROTL, VT, Expand);
960 setOperationAction(ISD::ROTR, VT, Expand);
961 setOperationAction(ISD::BSWAP, VT, Expand);
962 setOperationAction(ISD::SETCC, VT, Expand);
963 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
964 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
965 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
966 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
967 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
968 setOperationAction(ISD::TRUNCATE, VT, Expand);
969 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
970 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
971 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
972 setOperationAction(ISD::SELECT_CC, VT, Expand);
973 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
974 setTruncStoreAction(InnerVT, VT, Expand);
975
976 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
977 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
978
979 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
980 // types, we have to deal with them whether we ask for Expansion or not.
981 // Setting Expand causes its own optimisation problems though, so leave
982 // them legal.
983 if (VT.getVectorElementType() == MVT::i1)
984 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
985
986 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
987 // split/scalarized right now.
988 if (VT.getVectorElementType() == MVT::f16 ||
989 VT.getVectorElementType() == MVT::bf16)
990 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
991 }
992 }
993
994 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
995 // with -msoft-float, disable use of MMX as well.
996 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
997 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
998 // No operations on x86mmx supported, everything uses intrinsics.
999 }
1000
1001 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
1002 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1003 : &X86::VR128RegClass);
1004
1005 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
1006 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
1007 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
1008 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
1009 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
1010 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
1011 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1012 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
1013
1014 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
1015 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
1016
1017 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1018 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1019 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1020 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1021 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1022 }
1023
1024 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
1025 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1026 : &X86::VR128RegClass);
1027
1028 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
1029 // registers cannot be used even for integer operations.
1030 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
1031 : &X86::VR128RegClass);
1032 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1033 : &X86::VR128RegClass);
1034 addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1035 : &X86::VR128RegClass);
1036 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1037 : &X86::VR128RegClass);
1038 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1039 : &X86::VR128RegClass);
1040
1041 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1042 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1043 setOperationAction(ISD::SDIV, VT, Custom);
1044 setOperationAction(ISD::SREM, VT, Custom);
1045 setOperationAction(ISD::UDIV, VT, Custom);
1046 setOperationAction(ISD::UREM, VT, Custom);
1047 }
1048
1049 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
1050 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
1051 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
1052
1053 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1054 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1055 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1056 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
1057 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
1058 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
1059 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
1060 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
1061 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
1062 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1063 setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
1064 setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
1065
1066 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
1067 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
1068 setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1069
1070 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1071 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1072 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
1073
1074 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1075 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1076 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
1077 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
1078 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
1079 }
1080
1081 setOperationAction(ISD::ABDU, MVT::v16i8, Custom);
1082 setOperationAction(ISD::ABDS, MVT::v8i16, Custom);
1083
1084 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
1085 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
1086 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
1087 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
1088 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
1089 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
1090 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
1091 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
1092 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
1093 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
1094
1095 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1096 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1097 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1098 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1099
1100 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1101 setOperationAction(ISD::SETCC, VT, Custom);
1102 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1103 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1104 setOperationAction(ISD::CTPOP, VT, Custom);
1105 setOperationAction(ISD::ABS, VT, Custom);
1106
1107 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1108 // setcc all the way to isel and prefer SETGT in some isel patterns.
1109 setCondCodeAction(ISD::SETLT, VT, Custom);
1110 setCondCodeAction(ISD::SETLE, VT, Custom);
1111 }
1112
1113 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1114 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1115 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1116 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1117 setOperationAction(ISD::VSELECT, VT, Custom);
1118 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1119 }
1120
1121 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1122 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1123 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1124 setOperationAction(ISD::VSELECT, VT, Custom);
1125
1126 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1127 continue;
1128
1129 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1130 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1131 }
1132 setF16Action(MVT::v8f16, Expand);
1133 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
1134 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
1135 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
1136 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
1137
1138 // Custom lower v2i64 and v2f64 selects.
1139 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1140 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1141 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1142 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1143 setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
1144 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1145
1146 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
1147 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1148 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1149 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1150 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
1151 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1152
1153 // Custom legalize these to avoid over promotion or custom promotion.
1154 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1155 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1156 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1157 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1158 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1159 }
1160
1161 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1162 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
1163 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1164 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1165
1166 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1167 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1168
1169 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1170 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1171
1172 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1173 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1174 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1175 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1176 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1177
1178 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1179 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1180 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1181 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1182
1183 // We want to legalize this to an f64 load rather than an i64 load on
1184 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1185 // store.
1186 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1187 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1188 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1189 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1190 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1191 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1192
1193 // Add 32-bit vector stores to help vectorization opportunities.
1194 setOperationAction(ISD::STORE, MVT::v2i16, Custom);
1195 setOperationAction(ISD::STORE, MVT::v4i8, Custom);
1196
1197 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1198 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1199 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1200 if (!Subtarget.hasAVX512())
1201 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1202
1203 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1204 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1205 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1206
1207 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1208
1209 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1210 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1211 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1212 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1213 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1214 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1215
1216 // In the customized shift lowering, the legal v4i32/v2i64 cases
1217 // in AVX2 will be recognized.
1218 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1219 setOperationAction(ISD::SRL, VT, Custom);
1220 setOperationAction(ISD::SHL, VT, Custom);
1221 setOperationAction(ISD::SRA, VT, Custom);
1222 if (VT == MVT::v2i64) continue;
1223 setOperationAction(ISD::ROTL, VT, Custom);
1224 setOperationAction(ISD::ROTR, VT, Custom);
1225 setOperationAction(ISD::FSHL, VT, Custom);
1226 setOperationAction(ISD::FSHR, VT, Custom);
1227 }
1228
1229 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1230 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1231 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1232 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1233 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1234 }
1235
1236 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1237 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1238 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1239 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1240 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1241 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1242 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1243 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1244 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1245
1246 // These might be better off as horizontal vector ops.
1247 setOperationAction(ISD::ADD, MVT::i16, Custom);
1248 setOperationAction(ISD::ADD, MVT::i32, Custom);
1249 setOperationAction(ISD::SUB, MVT::i16, Custom);
1250 setOperationAction(ISD::SUB, MVT::i32, Custom);
1251 }
1252
1253 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1254 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1255 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1256 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1257 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1258 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1259 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1260 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1261 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1262 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1263 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1264 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1265 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1266 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1267
1268 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1269 }
1270
1271 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1272 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1273 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1274 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1275 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1276 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1277 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1278 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1279
1280 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1281 setOperationAction(ISD::ABDS, VT, Custom);
1282 setOperationAction(ISD::ABDU, VT, Custom);
1283 }
1284
1285 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1286 setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
1287 setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
1288
1289 // FIXME: Do we need to handle scalar-to-vector here?
1290 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1291 setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
1292
1293 // We directly match byte blends in the backend as they match the VSELECT
1294 // condition form.
1295 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1296
1297 // SSE41 brings specific instructions for doing vector sign extend even in
1298 // cases where we don't have SRA.
1299 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1300 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1301 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1302 }
1303
1304 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1305 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1306 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1307 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1308 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1309 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1310 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1311 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1312 }
1313
1314 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1315 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1316 // do the pre and post work in the vector domain.
1317 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1318 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1319 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1320 // so that DAG combine doesn't try to turn it into uint_to_fp.
1321 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1322 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1323 }
1324 }
1325
1326 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1327 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1328 }
1329
1330 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1331 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1332 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1333 setOperationAction(ISD::ROTL, VT, Custom);
1334 setOperationAction(ISD::ROTR, VT, Custom);
1335 }
1336
1337 // XOP can efficiently perform BITREVERSE with VPPERM.
1338 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1339 setOperationAction(ISD::BITREVERSE, VT, Custom);
1340
1341 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1342 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1343 setOperationAction(ISD::BITREVERSE, VT, Custom);
1344 }
1345
1346 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1347 bool HasInt256 = Subtarget.hasInt256();
1348
1349 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1350 : &X86::VR256RegClass);
1351 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1352 : &X86::VR256RegClass);
1353 addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1354 : &X86::VR256RegClass);
1355 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1356 : &X86::VR256RegClass);
1357 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1358 : &X86::VR256RegClass);
1359 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1360 : &X86::VR256RegClass);
1361 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1362 : &X86::VR256RegClass);
1363
1364 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1365 setOperationAction(ISD::FFLOOR, VT, Legal);
1366 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1367 setOperationAction(ISD::FCEIL, VT, Legal);
1368 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1369 setOperationAction(ISD::FTRUNC, VT, Legal);
1370 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1371 setOperationAction(ISD::FRINT, VT, Legal);
1372 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1373 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1374 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1375 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1376 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1377
1378 setOperationAction(ISD::FROUND, VT, Custom);
1379
1380 setOperationAction(ISD::FNEG, VT, Custom);
1381 setOperationAction(ISD::FABS, VT, Custom);
1382 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1383 }
1384
1385 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1386 // even though v8i16 is a legal type.
1387 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1388 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1389 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1390 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1391 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
1392 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1393 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
1394
1395 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
1396 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
1397 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
1398 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
1399 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
1400 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
1401
1402 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1403 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1404 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1405 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1406 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1407 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1408 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1409 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1410 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1411 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1412 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1413
1414 if (!Subtarget.hasAVX512())
1415 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1416
1417 // In the customized shift lowering, the legal v8i32/v4i64 cases
1418 // in AVX2 will be recognized.
1419 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1420 setOperationAction(ISD::SRL, VT, Custom);
1421 setOperationAction(ISD::SHL, VT, Custom);
1422 setOperationAction(ISD::SRA, VT, Custom);
1423 setOperationAction(ISD::ABDS, VT, Custom);
1424 setOperationAction(ISD::ABDU, VT, Custom);
1425 if (VT == MVT::v4i64) continue;
1426 setOperationAction(ISD::ROTL, VT, Custom);
1427 setOperationAction(ISD::ROTR, VT, Custom);
1428 setOperationAction(ISD::FSHL, VT, Custom);
1429 setOperationAction(ISD::FSHR, VT, Custom);
1430 }
1431
1432 // These types need custom splitting if their input is a 128-bit vector.
1433 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1434 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1435 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1436 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1437
1438 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1439 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1440 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1441 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1442 setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
1443 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1444 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1445
1446 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1447 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1448 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1449 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1450 }
1451
1452 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1453 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1454 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1455 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1456
1457 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1458 setOperationAction(ISD::SETCC, VT, Custom);
1459 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1460 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1461 setOperationAction(ISD::CTPOP, VT, Custom);
1462 setOperationAction(ISD::CTLZ, VT, Custom);
1463
1464 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1465 // setcc all the way to isel and prefer SETGT in some isel patterns.
1466 setCondCodeAction(ISD::SETLT, VT, Custom);
1467 setCondCodeAction(ISD::SETLE, VT, Custom);
1468 }
1469
1470 if (Subtarget.hasAnyFMA()) {
1471 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1472 MVT::v2f64, MVT::v4f64 }) {
1473 setOperationAction(ISD::FMA, VT, Legal);
1474 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1475 }
1476 }
1477
1478 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1479 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1480 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1481 }
1482
1483 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1484 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1485 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1486 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1487
1488 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1489 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1490 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1491 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1492 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1493 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1494 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1495 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1496
1497 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1498 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1499
1500 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1501 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1502 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1503 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1504 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1505
1506 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1507 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1508 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1509 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1510 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1511 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1512 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1513 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1514 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1515 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1516 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1517 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1518
1519 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1520 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1521 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1522 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1523 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1524 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1525 }
1526
1527 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1528 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1529 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1530 }
1531
1532 if (HasInt256) {
1533 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1534 // when we have a 256bit-wide blend with immediate.
1535 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1536 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1537
1538 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1539 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1540 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1541 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1542 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1543 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1544 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1545 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1546 }
1547 }
1548
1549 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1550 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1551 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1552 setOperationAction(ISD::MSTORE, VT, Legal);
1553 }
1554
1555 // Extract subvector is special because the value type
1556 // (result) is 128-bit but the source is 256-bit wide.
1557 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1558 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1559 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1560 }
1561
1562 // Custom lower several nodes for 256-bit types.
1563 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1564 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1565 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1566 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1567 setOperationAction(ISD::VSELECT, VT, Custom);
1568 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1569 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1570 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1571 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1572 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1573 setOperationAction(ISD::STORE, VT, Custom);
1574 }
1575 setF16Action(MVT::v16f16, Expand);
1576 setOperationAction(ISD::FADD, MVT::v16f16, Expand);
1577 setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
1578 setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
1579 setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
1580
1581 if (HasInt256) {
1582 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1583
1584 // Custom legalize 2x32 to get a little better code.
1585 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1586 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1587
1588 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1589 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1590 setOperationAction(ISD::MGATHER, VT, Custom);
1591 }
1592 }
1593
1594 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1595 Subtarget.hasF16C()) {
1596 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1597 setOperationAction(ISD::FP_ROUND, VT, Custom);
1598 setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1599 }
1600 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) {
1601 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1602 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
1603 }
1604 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1605 setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
1606 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1607 }
1608
1609 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1610 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
1611 }
1612
1613 // This block controls legalization of the mask vector sizes that are
1614 // available with AVX512. 512-bit vectors are in a separate block controlled
1615 // by useAVX512Regs.
1616 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1617 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1618 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1619 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1620 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1621 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1622
1623 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1624 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1625 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1626
1627 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1628 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1629 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1630 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1631 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1632 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1633 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1634 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1635 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1636 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1637 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1638 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1639
1640 // There is no byte sized k-register load or store without AVX512DQ.
1641 if (!Subtarget.hasDQI()) {
1642 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1643 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1644 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1645 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1646
1647 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1648 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1649 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1650 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1651 }
1652
1653 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1654 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1655 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1656 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1657 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1658 }
1659
1660 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1661 setOperationAction(ISD::VSELECT, VT, Expand);
1662
1663 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1664 setOperationAction(ISD::SETCC, VT, Custom);
1665 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1666 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1667 setOperationAction(ISD::SELECT, VT, Custom);
1668 setOperationAction(ISD::TRUNCATE, VT, Custom);
1669
1670 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1671 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1672 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1673 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1674 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1675 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1676 }
1677
1678 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1679 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1680 }
1681
1682 // This block controls legalization for 512-bit operations with 32/64 bit
1683 // elements. 512-bits can be disabled based on prefer-vector-width and
1684 // required-vector-width function attributes.
1685 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1686 bool HasBWI = Subtarget.hasBWI();
1687
1688 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1689 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1690 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1691 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1692 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1693 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1694 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1695
1696 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1697 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1698 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1699 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1700 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1701 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1702 if (HasBWI)
1703 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1704 }
1705
1706 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1707 setOperationAction(ISD::FNEG, VT, Custom);
1708 setOperationAction(ISD::FABS, VT, Custom);
1709 setOperationAction(ISD::FMA, VT, Legal);
1710 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1711 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1712 }
1713
1714 for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1715 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1716 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1717 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1718 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1719 }
1720
1721 for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1722 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1723 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1724 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1725 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1726 }
1727
1728 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
1729 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
1730 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
1731 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
1732 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
1733 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
1734
1735 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1736 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1737 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1738 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1739 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1740 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1741 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1742 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1743 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1744 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1745 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1746
1747 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1748 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1749 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1750 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1751 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1752 if (HasBWI)
1753 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1754
1755 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1756 // to 512-bit rather than use the AVX2 instructions so that we can use
1757 // k-masks.
1758 if (!Subtarget.hasVLX()) {
1759 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1760 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1761 setOperationAction(ISD::MLOAD, VT, Custom);
1762 setOperationAction(ISD::MSTORE, VT, Custom);
1763 }
1764 }
1765
1766 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1767 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1768 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1769 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1770 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1771 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1772 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1773 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1774 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1775 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1776 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1777 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1778 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1779
1780 if (HasBWI) {
1781 // Extends from v64i1 masks to 512-bit vectors.
1782 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1783 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1784 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1785 }
1786
1787 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1788 setOperationAction(ISD::FFLOOR, VT, Legal);
1789 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1790 setOperationAction(ISD::FCEIL, VT, Legal);
1791 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1792 setOperationAction(ISD::FTRUNC, VT, Legal);
1793 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1794 setOperationAction(ISD::FRINT, VT, Legal);
1795 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1796 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1797 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1798 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1799 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1800
1801 setOperationAction(ISD::FROUND, VT, Custom);
1802 }
1803
1804 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1805 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1806 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1807 }
1808
1809 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1810 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1811 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1812 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1813
1814 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1815 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1816 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1817 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1818
1819 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1820 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1821 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1822 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1823 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1824 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1825 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1826 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1827
1828 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1829 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1830
1831 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1832
1833 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1834 setOperationAction(ISD::SRL, VT, Custom);
1835 setOperationAction(ISD::SHL, VT, Custom);
1836 setOperationAction(ISD::SRA, VT, Custom);
1837 setOperationAction(ISD::ROTL, VT, Custom);
1838 setOperationAction(ISD::ROTR, VT, Custom);
1839 setOperationAction(ISD::SETCC, VT, Custom);
1840 setOperationAction(ISD::ABDS, VT, Custom);
1841 setOperationAction(ISD::ABDU, VT, Custom);
1842
1843 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1844 // setcc all the way to isel and prefer SETGT in some isel patterns.
1845 setCondCodeAction(ISD::SETLT, VT, Custom);
1846 setCondCodeAction(ISD::SETLE, VT, Custom);
1847 }
1848 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1849 setOperationAction(ISD::SMAX, VT, Legal);
1850 setOperationAction(ISD::UMAX, VT, Legal);
1851 setOperationAction(ISD::SMIN, VT, Legal);
1852 setOperationAction(ISD::UMIN, VT, Legal);
1853 setOperationAction(ISD::ABS, VT, Legal);
1854 setOperationAction(ISD::CTPOP, VT, Custom);
1855 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1856 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1857 }
1858
1859 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1860 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1861 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1862 setOperationAction(ISD::CTLZ, VT, Custom);
1863 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1864 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1865 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1866 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1867 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1868 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1869 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1870 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1871 }
1872
1873 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1874 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1875 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1876 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1877 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1878 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1879
1880 if (Subtarget.hasDQI()) {
1881 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1882 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1883 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
1884 setOperationAction(Opc, MVT::v8i64, Custom);
1885 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1886 }
1887
1888 if (Subtarget.hasCDI()) {
1889 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1890 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1891 setOperationAction(ISD::CTLZ, VT, Legal);
1892 }
1893 } // Subtarget.hasCDI()
1894
1895 if (Subtarget.hasVPOPCNTDQ()) {
1896 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1897 setOperationAction(ISD::CTPOP, VT, Legal);
1898 }
1899
1900 // Extract subvector is special because the value type
1901 // (result) is 256-bit but the source is 512-bit wide.
1902 // 128-bit was made Legal under AVX1.
1903 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1904 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1905 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1906
1907 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1908 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
1909 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1910 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1911 setOperationAction(ISD::SELECT, VT, Custom);
1912 setOperationAction(ISD::VSELECT, VT, Custom);
1913 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1914 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1915 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1916 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1917 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1918 }
1919 setF16Action(MVT::v32f16, Expand);
1920 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
1921 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom);
1922 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
1923 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
1924 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1925 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1926 setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
1927 }
1928
1929 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1930 setOperationAction(ISD::MLOAD, VT, Legal);
1931 setOperationAction(ISD::MSTORE, VT, Legal);
1932 setOperationAction(ISD::MGATHER, VT, Custom);
1933 setOperationAction(ISD::MSCATTER, VT, Custom);
1934 }
1935 if (HasBWI) {
1936 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1937 setOperationAction(ISD::MLOAD, VT, Legal);
1938 setOperationAction(ISD::MSTORE, VT, Legal);
1939 }
1940 } else {
1941 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1942 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1943 }
1944
1945 if (Subtarget.hasVBMI2()) {
1946 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1947 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1948 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1949 setOperationAction(ISD::FSHL, VT, Custom);
1950 setOperationAction(ISD::FSHR, VT, Custom);
1951 }
1952
1953 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1954 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1955 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1956 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1957 }
1958 }// useAVX512Regs
1959
1960 // This block controls legalization for operations that don't have
1961 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1962 // narrower widths.
1963 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1964 // These operations are handled on non-VLX by artificially widening in
1965 // isel patterns.
1966
1967 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
1968 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
1969 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1970
1971 if (Subtarget.hasDQI()) {
1972 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1973 // v2f32 UINT_TO_FP is already custom under SSE2.
1974 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1976, __extension__
__PRETTY_FUNCTION__))
1975 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1976, __extension__
__PRETTY_FUNCTION__))
1976 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1976, __extension__
__PRETTY_FUNCTION__))
;
1977 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1978 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1979 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1980 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1981 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1982 }
1983
1984 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1985 setOperationAction(ISD::SMAX, VT, Legal);
1986 setOperationAction(ISD::UMAX, VT, Legal);
1987 setOperationAction(ISD::SMIN, VT, Legal);
1988 setOperationAction(ISD::UMIN, VT, Legal);
1989 setOperationAction(ISD::ABS, VT, Legal);
1990 }
1991
1992 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1993 setOperationAction(ISD::ROTL, VT, Custom);
1994 setOperationAction(ISD::ROTR, VT, Custom);
1995 }
1996
1997 // Custom legalize 2x32 to get a little better code.
1998 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1999 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
2000
2001 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2002 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2003 setOperationAction(ISD::MSCATTER, VT, Custom);
2004
2005 if (Subtarget.hasDQI()) {
2006 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
2007 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
2008 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) {
2009 setOperationAction(Opc, MVT::v2i64, Custom);
2010 setOperationAction(Opc, MVT::v4i64, Custom);
2011 }
2012 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
2013 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
2014 }
2015
2016 if (Subtarget.hasCDI()) {
2017 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2018 setOperationAction(ISD::CTLZ, VT, Legal);
2019 }
2020 } // Subtarget.hasCDI()
2021
2022 if (Subtarget.hasVPOPCNTDQ()) {
2023 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2024 setOperationAction(ISD::CTPOP, VT, Legal);
2025 }
2026 }
2027
2028 // This block control legalization of v32i1/v64i1 which are available with
2029 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
2030 // useBWIRegs.
2031 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2032 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
2033 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
2034
2035 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
2036 setOperationAction(ISD::VSELECT, VT, Expand);
2037 setOperationAction(ISD::TRUNCATE, VT, Custom);
2038 setOperationAction(ISD::SETCC, VT, Custom);
2039 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2040 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
2041 setOperationAction(ISD::SELECT, VT, Custom);
2042 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2043 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2044 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
2045 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
2046 }
2047
2048 for (auto VT : { MVT::v16i1, MVT::v32i1 })
2049 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
2050
2051 // Extends from v32i1 masks to 256-bit vectors.
2052 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
2053 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
2054 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
2055
2056 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2057 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
2058 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
2059 }
2060
2061 // These operations are handled on non-VLX by artificially widening in
2062 // isel patterns.
2063 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
2064
2065 if (Subtarget.hasBITALG()) {
2066 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2067 setOperationAction(ISD::CTPOP, VT, Legal);
2068 }
2069 }
2070
2071 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2072 auto setGroup = [&] (MVT VT) {
2073 setOperationAction(ISD::FADD, VT, Legal);
2074 setOperationAction(ISD::STRICT_FADD, VT, Legal);
2075 setOperationAction(ISD::FSUB, VT, Legal);
2076 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
2077 setOperationAction(ISD::FMUL, VT, Legal);
2078 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
2079 setOperationAction(ISD::FDIV, VT, Legal);
2080 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
2081 setOperationAction(ISD::FSQRT, VT, Legal);
2082 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
2083
2084 setOperationAction(ISD::FFLOOR, VT, Legal);
2085 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
2086 setOperationAction(ISD::FCEIL, VT, Legal);
2087 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
2088 setOperationAction(ISD::FTRUNC, VT, Legal);
2089 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
2090 setOperationAction(ISD::FRINT, VT, Legal);
2091 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
2092 setOperationAction(ISD::FNEARBYINT, VT, Legal);
2093 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
2094
2095 setOperationAction(ISD::FROUND, VT, Custom);
2096
2097 setOperationAction(ISD::LOAD, VT, Legal);
2098 setOperationAction(ISD::STORE, VT, Legal);
2099
2100 setOperationAction(ISD::FMA, VT, Legal);
2101 setOperationAction(ISD::STRICT_FMA, VT, Legal);
2102 setOperationAction(ISD::VSELECT, VT, Legal);
2103 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2104 setOperationAction(ISD::SELECT, VT, Custom);
2105
2106 setOperationAction(ISD::FNEG, VT, Custom);
2107 setOperationAction(ISD::FABS, VT, Custom);
2108 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
2109 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2110 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2111 };
2112
2113 // AVX512_FP16 scalar operations
2114 setGroup(MVT::f16);
2115 setOperationAction(ISD::FREM, MVT::f16, Promote);
2116 setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
2117 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
2118 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
2119 setOperationAction(ISD::SETCC, MVT::f16, Custom);
2120 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
2121 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
2122 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
2123 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
2124 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
2125 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
2126 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
2127 setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
2128 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
2129
2130 setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
2131 setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
2132
2133 if (Subtarget.useAVX512Regs()) {
2134 setGroup(MVT::v32f16);
2135 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
2136 setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
2137 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
2138 setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
2139 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
2140 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
2141 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
2142 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
2143 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
2144 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal);
2145 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
2146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
2147
2148 setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
2149 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
2150 setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
2151 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
2152 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
2153 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
2154 MVT::v32i16);
2155 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2156 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
2157 MVT::v32i16);
2158 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2159 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
2160 MVT::v32i16);
2161 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2162 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
2163 MVT::v32i16);
2164
2165 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
2166 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
2167 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
2168
2169 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2170 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2171
2172 setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
2173 setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
2174 }
2175
2176 if (Subtarget.hasVLX()) {
2177 setGroup(MVT::v8f16);
2178 setGroup(MVT::v16f16);
2179
2180 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
2181 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
2182 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
2183 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
2184 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
2185 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
2186 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
2187 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
2188 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
2189 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
2190
2191 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
2192 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
2193 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
2194 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
2195 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
2196 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
2197 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
2198 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
2199 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
2200 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
2201
2202 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2203 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
2204 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
2205
2206 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
2207 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
2208 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
2209
2210 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2211 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2212 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2213 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2214
2215 // Need to custom widen these to prevent scalarization.
2216 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2217 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2218 }
2219 }
2220
2221 if (!Subtarget.useSoftFloat() &&
2222 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2223 addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
2224 addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
2225 // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
2226 // provide the method to promote BUILD_VECTOR. Set the operation action
2227 // Custom to do the customization later.
2228 setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
2229 for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2230 setF16Action(VT, Expand);
2231 setOperationAction(ISD::FADD, VT, Expand);
2232 setOperationAction(ISD::FSUB, VT, Expand);
2233 setOperationAction(ISD::FMUL, VT, Expand);
2234 setOperationAction(ISD::FDIV, VT, Expand);
2235 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2236 }
2237 addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
2238 }
2239
2240 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2241 addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
2242 setF16Action(MVT::v32bf16, Expand);
2243 setOperationAction(ISD::FADD, MVT::v32bf16, Expand);
2244 setOperationAction(ISD::FSUB, MVT::v32bf16, Expand);
2245 setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
2246 setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
2247 setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
2248 }
2249
2250 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2251 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2252 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2253 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2254 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2255 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2256
2257 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2258 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2259 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2260 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2261 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2262
2263 if (Subtarget.hasBWI()) {
2264 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2265 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2266 }
2267
2268 if (Subtarget.hasFP16()) {
2269 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2270 setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
2271 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
2272 setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
2273 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
2274 setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
2275 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
2276 setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
2277 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
2278 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2279 setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
2280 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
2281 setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
2282 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
2283 setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
2284 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
2285 setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
2286 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
2287 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2288 setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
2289 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
2290 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
2291 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
2292 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2293 setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
2294 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
2295 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
2296 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
2297 }
2298
2299 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
2300 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
2301 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
2302 }
2303
2304 if (Subtarget.hasAMXTILE()) {
2305 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2306 }
2307
2308 // We want to custom lower some of our intrinsics.
2309 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
2310 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
2311 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
2312 if (!Subtarget.is64Bit()) {
2313 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
2314 }
2315
2316 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2317 // handle type legalization for these operations here.
2318 //
2319 // FIXME: We really should do custom legalization for addition and
2320 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2321 // than generic legalization for 64-bit multiplication-with-overflow, though.
2322 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2323 if (VT == MVT::i64 && !Subtarget.is64Bit())
2324 continue;
2325 // Add/Sub/Mul with overflow operations are custom lowered.
2326 setOperationAction(ISD::SADDO, VT, Custom);
2327 setOperationAction(ISD::UADDO, VT, Custom);
2328 setOperationAction(ISD::SSUBO, VT, Custom);
2329 setOperationAction(ISD::USUBO, VT, Custom);
2330 setOperationAction(ISD::SMULO, VT, Custom);
2331 setOperationAction(ISD::UMULO, VT, Custom);
2332
2333 // Support carry in as value rather than glue.
2334 setOperationAction(ISD::ADDCARRY, VT, Custom);
2335 setOperationAction(ISD::SUBCARRY, VT, Custom);
2336 setOperationAction(ISD::SETCCCARRY, VT, Custom);
2337 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
2338 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
2339 }
2340
2341 if (!Subtarget.is64Bit()) {
2342 // These libcalls are not available in 32-bit.
2343 setLibcallName(RTLIB::SHL_I128, nullptr);
2344 setLibcallName(RTLIB::SRL_I128, nullptr);
2345 setLibcallName(RTLIB::SRA_I128, nullptr);
2346 setLibcallName(RTLIB::MUL_I128, nullptr);
2347 // The MULO libcall is not part of libgcc, only compiler-rt.
2348 setLibcallName(RTLIB::MULO_I64, nullptr);
2349 }
2350 // The MULO libcall is not part of libgcc, only compiler-rt.
2351 setLibcallName(RTLIB::MULO_I128, nullptr);
2352
2353 // Combine sin / cos into _sincos_stret if it is available.
2354 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2355 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2356 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
2357 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
2358 }
2359
2360 if (Subtarget.isTargetWin64()) {
2361 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2362 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2363 setOperationAction(ISD::SREM, MVT::i128, Custom);
2364 setOperationAction(ISD::UREM, MVT::i128, Custom);
2365 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
2366 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
2367 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
2368 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
2369 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
2370 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
2371 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
2372 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
2373 }
2374
2375 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2376 // is. We should promote the value to 64-bits to solve this.
2377 // This is what the CRT headers do - `fmodf` is an inline header
2378 // function casting to f64 and calling `fmod`.
2379 if (Subtarget.is32Bit() &&
2380 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2381 for (ISD::NodeType Op :
2382 {ISD::FCEIL, ISD::STRICT_FCEIL,
2383 ISD::FCOS, ISD::STRICT_FCOS,
2384 ISD::FEXP, ISD::STRICT_FEXP,
2385 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2386 ISD::FREM, ISD::STRICT_FREM,
2387 ISD::FLOG, ISD::STRICT_FLOG,
2388 ISD::FLOG10, ISD::STRICT_FLOG10,
2389 ISD::FPOW, ISD::STRICT_FPOW,
2390 ISD::FSIN, ISD::STRICT_FSIN})
2391 if (isOperationExpand(Op, MVT::f32))
2392 setOperationAction(Op, MVT::f32, Promote);
2393
2394 // We have target-specific dag combine patterns for the following nodes:
2395 setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
2396 ISD::SCALAR_TO_VECTOR,
2397 ISD::INSERT_VECTOR_ELT,
2398 ISD::EXTRACT_VECTOR_ELT,
2399 ISD::CONCAT_VECTORS,
2400 ISD::INSERT_SUBVECTOR,
2401 ISD::EXTRACT_SUBVECTOR,
2402 ISD::BITCAST,
2403 ISD::VSELECT,
2404 ISD::SELECT,
2405 ISD::SHL,
2406 ISD::SRA,
2407 ISD::SRL,
2408 ISD::OR,
2409 ISD::AND,
2410 ISD::ADD,
2411 ISD::FADD,
2412 ISD::FSUB,
2413 ISD::FNEG,
2414 ISD::FMA,
2415 ISD::STRICT_FMA,
2416 ISD::FMINNUM,
2417 ISD::FMAXNUM,
2418 ISD::SUB,
2419 ISD::LOAD,
2420 ISD::MLOAD,
2421 ISD::STORE,
2422 ISD::MSTORE,
2423 ISD::TRUNCATE,
2424 ISD::ZERO_EXTEND,
2425 ISD::ANY_EXTEND,
2426 ISD::SIGN_EXTEND,
2427 ISD::SIGN_EXTEND_INREG,
2428 ISD::ANY_EXTEND_VECTOR_INREG,
2429 ISD::SIGN_EXTEND_VECTOR_INREG,
2430 ISD::ZERO_EXTEND_VECTOR_INREG,
2431 ISD::SINT_TO_FP,
2432 ISD::UINT_TO_FP,
2433 ISD::STRICT_SINT_TO_FP,
2434 ISD::STRICT_UINT_TO_FP,
2435 ISD::SETCC,
2436 ISD::MUL,
2437 ISD::XOR,
2438 ISD::MSCATTER,
2439 ISD::MGATHER,
2440 ISD::FP16_TO_FP,
2441 ISD::FP_EXTEND,
2442 ISD::STRICT_FP_EXTEND,
2443 ISD::FP_ROUND,
2444 ISD::STRICT_FP_ROUND});
2445
2446 computeRegisterProperties(Subtarget.getRegisterInfo());
2447
2448 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2449 MaxStoresPerMemsetOptSize = 8;
2450 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2451 MaxStoresPerMemcpyOptSize = 4;
2452 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2453 MaxStoresPerMemmoveOptSize = 4;
2454
2455 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2456 // that needs to benchmarked and balanced with the potential use of vector
2457 // load/store types (PR33329, PR33914).
2458 MaxLoadsPerMemcmp = 2;
2459 MaxLoadsPerMemcmpOptSize = 2;
2460
2461 // Default loop alignment, which can be overridden by -align-loops.
2462 setPrefLoopAlignment(Align(16));
2463
2464 // An out-of-order CPU can speculatively execute past a predictable branch,
2465 // but a conditional move could be stalled by an expensive earlier operation.
2466 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2467 EnableExtLdPromotion = true;
2468 setPrefFunctionAlignment(Align(16));
2469
2470 verifyIntrinsicTables();
2471
2472 // Default to having -disable-strictnode-mutation on
2473 IsStrictFPEnabled = true;
2474}
2475
2476// This has so far only been implemented for 64-bit MachO.
2477bool X86TargetLowering::useLoadStackGuardNode() const {
2478 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2479}
2480
2481bool X86TargetLowering::useStackGuardXorFP() const {
2482 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2483 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2484}
2485
2486SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2487 const SDLoc &DL) const {
2488 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2489 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2490 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2491 return SDValue(Node, 0);
2492}
2493
2494TargetLoweringBase::LegalizeTypeAction
2495X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2496 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2497 !Subtarget.hasBWI())
2498 return TypeSplitVector;
2499
2500 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2501 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
2502 return TypeSplitVector;
2503
2504 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2505 VT.getVectorElementType() != MVT::i1)
2506 return TypeWidenVector;
2507
2508 return TargetLoweringBase::getPreferredVectorAction(VT);
2509}
2510
2511static std::pair<MVT, unsigned>
2512handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2513 const X86Subtarget &Subtarget) {
2514 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2515 // convention is one that uses k registers.
2516 if (NumElts == 2)
2517 return {MVT::v2i64, 1};
2518 if (NumElts == 4)
2519 return {MVT::v4i32, 1};
2520 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2521 CC != CallingConv::Intel_OCL_BI)
2522 return {MVT::v8i16, 1};
2523 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2524 CC != CallingConv::Intel_OCL_BI)
2525 return {MVT::v16i8, 1};
2526 // v32i1 passes in ymm unless we have BWI and the calling convention is
2527 // regcall.
2528 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2529 return {MVT::v32i8, 1};
2530 // Split v64i1 vectors if we don't have v64i8 available.
2531 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2532 if (Subtarget.useAVX512Regs())
2533 return {MVT::v64i8, 1};
2534 return {MVT::v32i8, 2};
2535 }
2536
2537 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2538 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2539 NumElts > 64)
2540 return {MVT::i8, NumElts};
2541
2542 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2543}
2544
2545MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2546 CallingConv::ID CC,
2547 EVT VT) const {
2548 if (VT.isVector()) {
2549 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2550 unsigned NumElts = VT.getVectorNumElements();
2551
2552 MVT RegisterVT;
2553 unsigned NumRegisters;
2554 std::tie(RegisterVT, NumRegisters) =
2555 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2556 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2557 return RegisterVT;
2558 }
2559
2560 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2561 return MVT::v8f16;
2562 }
2563
2564 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
2565 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
2566 !Subtarget.hasX87())
2567 return MVT::i32;
2568
2569 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2570 return getRegisterTypeForCallingConv(Context, CC,
2571 VT.changeVectorElementTypeToInteger());
2572
2573 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2574}
2575
2576unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2577 CallingConv::ID CC,
2578 EVT VT) const {
2579 if (VT.isVector()) {
2580 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2581 unsigned NumElts = VT.getVectorNumElements();
2582
2583 MVT RegisterVT;
2584 unsigned NumRegisters;
2585 std::tie(RegisterVT, NumRegisters) =
2586 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2587 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2588 return NumRegisters;
2589 }
2590
2591 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2592 return 1;
2593 }
2594
2595 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
2596 // x87 is disabled.
2597 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
2598 if (VT == MVT::f64)
2599 return 2;
2600 if (VT == MVT::f80)
2601 return 3;
2602 }
2603
2604 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2605 return getNumRegistersForCallingConv(Context, CC,
2606 VT.changeVectorElementTypeToInteger());
2607
2608 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2609}
2610
2611unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2612 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2613 unsigned &NumIntermediates, MVT &RegisterVT) const {
2614 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2615 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2616 Subtarget.hasAVX512() &&
2617 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2618 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2619 VT.getVectorNumElements() > 64)) {
2620 RegisterVT = MVT::i8;
2621 IntermediateVT = MVT::i1;
2622 NumIntermediates = VT.getVectorNumElements();
2623 return NumIntermediates;
2624 }
2625
2626 // Split v64i1 vectors if we don't have v64i8 available.
2627 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2628 CC != CallingConv::X86_RegCall) {
2629 RegisterVT = MVT::v32i8;
2630 IntermediateVT = MVT::v32i1;
2631 NumIntermediates = 2;
2632 return 2;
2633 }
2634
2635 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2636 NumIntermediates, RegisterVT);
2637}
2638
2639EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2640 LLVMContext& Context,
2641 EVT VT) const {
2642 if (!VT.isVector())
2643 return MVT::i8;
2644
2645 if (Subtarget.hasAVX512()) {
2646 // Figure out what this type will be legalized to.
2647 EVT LegalVT = VT;
2648 while (getTypeAction(Context, LegalVT) != TypeLegal)
2649 LegalVT = getTypeToTransformTo(Context, LegalVT);
2650
2651 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2652 if (LegalVT.getSimpleVT().is512BitVector())
2653 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2654
2655 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2656 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2657 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2658 // vXi16/vXi8.
2659 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2660 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2661 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2662 }
2663 }
2664
2665 return VT.changeVectorElementTypeToInteger();
2666}
2667
2668/// Helper for getByValTypeAlignment to determine
2669/// the desired ByVal argument alignment.
2670static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2671 if (MaxAlign == 16)
2672 return;
2673 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2674 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
2675 MaxAlign = Align(16);
2676 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2677 Align EltAlign;
2678 getMaxByValAlign(ATy->getElementType(), EltAlign);
2679 if (EltAlign > MaxAlign)
2680 MaxAlign = EltAlign;
2681 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2682 for (auto *EltTy : STy->elements()) {
2683 Align EltAlign;
2684 getMaxByValAlign(EltTy, EltAlign);
2685 if (EltAlign > MaxAlign)
2686 MaxAlign = EltAlign;
2687 if (MaxAlign == 16)
2688 break;
2689 }
2690 }
2691}
2692
2693/// Return the desired alignment for ByVal aggregate
2694/// function arguments in the caller parameter area. For X86, aggregates
2695/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2696/// are at 4-byte boundaries.
2697uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
2698 const DataLayout &DL) const {
2699 if (Subtarget.is64Bit()) {
2700 // Max of 8 and alignment of type.
2701 Align TyAlign = DL.getABITypeAlign(Ty);
2702 if (TyAlign > 8)
2703 return TyAlign.value();
2704 return 8;
2705 }
2706
2707 Align Alignment(4);
2708 if (Subtarget.hasSSE1())
2709 getMaxByValAlign(Ty, Alignment);
2710 return Alignment.value();
2711}
2712
2713/// It returns EVT::Other if the type should be determined using generic
2714/// target-independent logic.
2715/// For vector ops we check that the overall size isn't larger than our
2716/// preferred vector width.
2717EVT X86TargetLowering::getOptimalMemOpType(
2718 const MemOp &Op, const AttributeList &FuncAttributes) const {
2719 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2720 if (Op.size() >= 16 &&
2721 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2722 // FIXME: Check if unaligned 64-byte accesses are slow.
2723 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2724 (Subtarget.getPreferVectorWidth() >= 512)) {
2725 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2726 }
2727 // FIXME: Check if unaligned 32-byte accesses are slow.
2728 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2729 Subtarget.useLight256BitInstructions()) {
2730 // Although this isn't a well-supported type for AVX1, we'll let
2731 // legalization and shuffle lowering produce the optimal codegen. If we
2732 // choose an optimal type with a vector element larger than a byte,
2733 // getMemsetStores() may create an intermediate splat (using an integer
2734 // multiply) before we splat as a vector.
2735 return MVT::v32i8;
2736 }
2737 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2738 return MVT::v16i8;
2739 // TODO: Can SSE1 handle a byte vector?
2740 // If we have SSE1 registers we should be able to use them.
2741 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2742 (Subtarget.getPreferVectorWidth() >= 128))
2743 return MVT::v4f32;
2744 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2745 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2746 // Do not use f64 to lower memcpy if source is string constant. It's
2747 // better to use i32 to avoid the loads.
2748 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2749 // The gymnastics of splatting a byte value into an XMM register and then
2750 // only using 8-byte stores (because this is a CPU with slow unaligned
2751 // 16-byte accesses) makes that a loser.
2752 return MVT::f64;
2753 }
2754 }
2755 // This is a compromise. If we reach here, unaligned accesses may be slow on
2756 // this target. However, creating smaller, aligned accesses could be even
2757 // slower and would certainly be a lot more code.
2758 if (Subtarget.is64Bit() && Op.size() >= 8)
2759 return MVT::i64;
2760 return MVT::i32;
2761}
2762
2763bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2764 if (VT == MVT::f32)
2765 return Subtarget.hasSSE1();
2766 if (VT == MVT::f64)
2767 return Subtarget.hasSSE2();
2768 return true;
2769}
2770
2771static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
2772 return (8 * Alignment.value()) % SizeInBits == 0;
2773}
2774
2775bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
2776 if (isBitAligned(Alignment, VT.getSizeInBits()))
2777 return true;
2778 switch (VT.getSizeInBits()) {
2779 default:
2780 // 8-byte and under are always assumed to be fast.
2781 return true;
2782 case 128:
2783 return !Subtarget.isUnalignedMem16Slow();
2784 case 256:
2785 return !Subtarget.isUnalignedMem32Slow();
2786 // TODO: What about AVX-512 (512-bit) accesses?
2787 }
2788}
2789
2790bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2791 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2792 unsigned *Fast) const {
2793 if (Fast)
2794 *Fast = isMemoryAccessFast(VT, Alignment);
2795 // NonTemporal vector memory ops must be aligned.
2796 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2797 // NT loads can only be vector aligned, so if its less aligned than the
2798 // minimum vector size (which we can split the vector down to), we might as
2799 // well use a regular unaligned vector load.
2800 // We don't have any NT loads pre-SSE41.
2801 if (!!(Flags & MachineMemOperand::MOLoad))
2802 return (Alignment < 16 || !Subtarget.hasSSE41());
2803 return false;
2804 }
2805 // Misaligned accesses of any size are always allowed.
2806 return true;
2807}
2808
2809bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
2810 const DataLayout &DL, EVT VT,
2811 unsigned AddrSpace, Align Alignment,
2812 MachineMemOperand::Flags Flags,
2813 unsigned *Fast) const {
2814 if (Fast)
2815 *Fast = isMemoryAccessFast(VT, Alignment);
2816 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2817 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
2818 /*Fast=*/nullptr))
2819 return true;
2820 // NonTemporal vector memory ops are special, and must be aligned.
2821 if (!isBitAligned(Alignment, VT.getSizeInBits()))
2822 return false;
2823 switch (VT.getSizeInBits()) {
2824 case 128:
2825 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
2826 return true;
2827 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
2828 return true;
2829 return false;
2830 case 256:
2831 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
2832 return true;
2833 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
2834 return true;
2835 return false;
2836 case 512:
2837 if (Subtarget.hasAVX512())
2838 return true;
2839 return false;
2840 default:
2841 return false; // Don't have NonTemporal vector memory ops of this size.
2842 }
2843 }
2844 return true;
2845}
2846
2847/// Return the entry encoding for a jump table in the
2848/// current function. The returned value is a member of the
2849/// MachineJumpTableInfo::JTEntryKind enum.
2850unsigned X86TargetLowering::getJumpTableEncoding() const {
2851 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2852 // symbol.
2853 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2854 return MachineJumpTableInfo::EK_Custom32;
2855
2856 // Otherwise, use the normal jump table encoding heuristics.
2857 return TargetLowering::getJumpTableEncoding();
2858}
2859
2860bool X86TargetLowering::splitValueIntoRegisterParts(
2861 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
2862 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
2863 bool IsABIRegCopy = CC.has_value();
2864 EVT ValueVT = Val.getValueType();
2865 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2866 unsigned ValueBits = ValueVT.getSizeInBits();
2867 unsigned PartBits = PartVT.getSizeInBits();
2868 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
2869 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
2870 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
2871 Parts[0] = Val;
2872 return true;
2873 }
2874 return false;
2875}
2876
2877SDValue X86TargetLowering::joinRegisterPartsIntoValue(
2878 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
2879 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
2880 bool IsABIRegCopy = CC.has_value();
2881 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2882 unsigned ValueBits = ValueVT.getSizeInBits();
2883 unsigned PartBits = PartVT.getSizeInBits();
2884 SDValue Val = Parts[0];
2885
2886 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
2887 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
2888 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
2889 return Val;
2890 }
2891 return SDValue();
2892}
2893
2894bool X86TargetLowering::useSoftFloat() const {
2895 return Subtarget.useSoftFloat();
2896}
2897
2898void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2899 ArgListTy &Args) const {
2900
2901 // Only relabel X86-32 for C / Stdcall CCs.
2902 if (Subtarget.is64Bit())
2903 return;
2904 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2905 return;
2906 unsigned ParamRegs = 0;
2907 if (auto *M = MF->getFunction().getParent())
2908 ParamRegs = M->getNumberRegisterParameters();
2909
2910 // Mark the first N int arguments as having reg
2911 for (auto &Arg : Args) {
2912 Type *T = Arg.Ty;
2913 if (T->isIntOrPtrTy())
2914 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2915 unsigned numRegs = 1;
2916 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2917 numRegs = 2;
2918 if (ParamRegs < numRegs)
2919 return;
2920 ParamRegs -= numRegs;
2921 Arg.IsInReg = true;
2922 }
2923 }
2924}
2925
2926const MCExpr *
2927X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2928 const MachineBasicBlock *MBB,
2929 unsigned uid,MCContext &Ctx) const{
2930 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2930, __extension__
__PRETTY_FUNCTION__))
;
2931 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2932 // entries.
2933 return MCSymbolRefExpr::create(MBB->getSymbol(),
2934 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2935}
2936
2937/// Returns relocation base for the given PIC jumptable.
2938SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2939 SelectionDAG &DAG) const {
2940 if (!Subtarget.is64Bit())
2941 // This doesn't have SDLoc associated with it, but is not really the
2942 // same as a Register.
2943 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2944 getPointerTy(DAG.getDataLayout()));
2945 return Table;
2946}
2947
2948/// This returns the relocation base for the given PIC jumptable,
2949/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2950const MCExpr *X86TargetLowering::
2951getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2952 MCContext &Ctx) const {
2953 // X86-64 uses RIP relative addressing based on the jump table label.
2954 if (Subtarget.isPICStyleRIPRel())
2955 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2956
2957 // Otherwise, the reference is relative to the PIC base.
2958 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2959}
2960
2961std::pair<const TargetRegisterClass *, uint8_t>
2962X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2963 MVT VT) const {
2964 const TargetRegisterClass *RRC = nullptr;
2965 uint8_t Cost = 1;
2966 switch (VT.SimpleTy) {
2967 default:
2968 return TargetLowering::findRepresentativeClass(TRI, VT);
2969 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2970 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2971 break;
2972 case MVT::x86mmx:
2973 RRC = &X86::VR64RegClass;
2974 break;
2975 case MVT::f32: case MVT::f64:
2976 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2977 case MVT::v4f32: case MVT::v2f64:
2978 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2979 case MVT::v8f32: case MVT::v4f64:
2980 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2981 case MVT::v16f32: case MVT::v8f64:
2982 RRC = &X86::VR128XRegClass;
2983 break;
2984 }
2985 return std::make_pair(RRC, Cost);
2986}
2987
2988unsigned X86TargetLowering::getAddressSpace() const {
2989 if (Subtarget.is64Bit())
2990 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2991 return 256;
2992}
2993
2994static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2995 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2996 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2997}
2998
2999static Constant* SegmentOffset(IRBuilderBase &IRB,
3000 int Offset, unsigned AddressSpace) {
3001 return ConstantExpr::getIntToPtr(
3002 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
3003 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
3004}
3005
3006Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
3007 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
3008 // tcbhead_t; use it instead of the usual global variable (see
3009 // sysdeps/{i386,x86_64}/nptl/tls.h)
3010 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
3011 if (Subtarget.isTargetFuchsia()) {
3012 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
3013 return SegmentOffset(IRB, 0x10, getAddressSpace());
3014 } else {
3015 unsigned AddressSpace = getAddressSpace();
3016 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
3017 // Specially, some users may customize the base reg and offset.
3018 int Offset = M->getStackProtectorGuardOffset();
3019 // If we don't set -stack-protector-guard-offset value:
3020 // %fs:0x28, unless we're using a Kernel code model, in which case
3021 // it's %gs:0x28. gs:0x14 on i386.
3022 if (Offset == INT_MAX2147483647)
3023 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
3024
3025 StringRef GuardReg = M->getStackProtectorGuardReg();
3026 if (GuardReg == "fs")
3027 AddressSpace = X86AS::FS;
3028 else if (GuardReg == "gs")
3029 AddressSpace = X86AS::GS;
3030
3031 // Use symbol guard if user specify.
3032 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
3033 if (!GuardSymb.empty()) {
3034 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
3035 if (!GV) {
3036 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
3037 : Type::getInt32Ty(M->getContext());
3038 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
3039 nullptr, GuardSymb, nullptr,
3040 GlobalValue::NotThreadLocal, AddressSpace);
3041 }
3042 return GV;
3043 }
3044
3045 return SegmentOffset(IRB, Offset, AddressSpace);
3046 }
3047 }
3048 return TargetLowering::getIRStackGuard(IRB);
3049}
3050
3051void X86TargetLowering::insertSSPDeclarations(Module &M) const {
3052 // MSVC CRT provides functionalities for stack protection.
3053 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3054 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3055 // MSVC CRT has a global variable holding security cookie.
3056 M.getOrInsertGlobal("__security_cookie",
3057 Type::getInt8PtrTy(M.getContext()));
3058
3059 // MSVC CRT has a function to validate security cookie.
3060 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
3061 "__security_check_cookie", Type::getVoidTy(M.getContext()),
3062 Type::getInt8PtrTy(M.getContext()));
3063 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
3064 F->setCallingConv(CallingConv::X86_FastCall);
3065 F->addParamAttr(0, Attribute::AttrKind::InReg);
3066 }
3067 return;
3068 }
3069
3070 StringRef GuardMode = M.getStackProtectorGuard();
3071
3072 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
3073 if ((GuardMode == "tls" || GuardMode.empty()) &&
3074 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
3075 return;
3076 TargetLowering::insertSSPDeclarations(M);
3077}
3078
3079Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
3080 // MSVC CRT has a global variable holding security cookie.
3081 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3082 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3083 return M.getGlobalVariable("__security_cookie");
3084 }
3085 return TargetLowering::getSDagStackGuard(M);
3086}
3087
3088Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
3089 // MSVC CRT has a function to validate security cookie.
3090 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3091 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3092 return M.getFunction("__security_check_cookie");
3093 }
3094 return TargetLowering::getSSPStackGuardCheck(M);
3095}
3096
3097Value *
3098X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
3099 if (Subtarget.getTargetTriple().isOSContiki())
3100 return getDefaultSafeStackPointerLocation(IRB, false);
3101
3102 // Android provides a fixed TLS slot for the SafeStack pointer. See the
3103 // definition of TLS_SLOT_SAFESTACK in
3104 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
3105 if (Subtarget.isTargetAndroid()) {
3106 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
3107 // %gs:0x24 on i386
3108 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
3109 return SegmentOffset(IRB, Offset, getAddressSpace());
3110 }
3111
3112 // Fuchsia is similar.
3113 if (Subtarget.isTargetFuchsia()) {
3114 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
3115 return SegmentOffset(IRB, 0x18, getAddressSpace());
3116 }
3117
3118 return TargetLowering::getSafeStackPointerLocation(IRB);
3119}
3120
3121//===----------------------------------------------------------------------===//
3122// Return Value Calling Convention Implementation
3123//===----------------------------------------------------------------------===//
3124
3125bool X86TargetLowering::CanLowerReturn(
3126 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3127 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3128 SmallVector<CCValAssign, 16> RVLocs;
3129 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3130 return CCInfo.CheckReturn(Outs, RetCC_X86);
3131}
3132
3133const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
3134 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
3135 return ScratchRegs;
3136}
3137
3138ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
3139 // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
3140 // tests at the moment, which is not what we expected.
3141 static const MCPhysReg RCRegs[] = {X86::MXCSR};
3142 return RCRegs;
3143}
3144
3145/// Lowers masks values (v*i1) to the local register values
3146/// \returns DAG node after lowering to register type
3147static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
3148 const SDLoc &Dl, SelectionDAG &DAG) {
3149 EVT ValVT = ValArg.getValueType();
3150
3151 if (ValVT == MVT::v1i1)
3152 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
3153 DAG.getIntPtrConstant(0, Dl));
3154
3155 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
3156 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
3157 // Two stage lowering might be required
3158 // bitcast: v8i1 -> i8 / v16i1 -> i16
3159 // anyextend: i8 -> i32 / i16 -> i32
3160 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
3161 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
3162 if (ValLoc == MVT::i32)
3163 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
3164 return ValToCopy;
3165 }
3166
3167 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
3168 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
3169 // One stage lowering is required
3170 // bitcast: v32i1 -> i32 / v64i1 -> i64
3171 return DAG.getBitcast(ValLoc, ValArg);
3172 }
3173
3174 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
3175}
3176
3177/// Breaks v64i1 value into two registers and adds the new node to the DAG
3178static void Passv64i1ArgInRegs(
3179 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
3180 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
3181 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
3182 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3182, __extension__
__PRETTY_FUNCTION__))
;
3183 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3183, __extension__
__PRETTY_FUNCTION__))
;
3184 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3184, __extension__
__PRETTY_FUNCTION__))
;
3185 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3186, __extension__
__PRETTY_FUNCTION__))
3186 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3186, __extension__
__PRETTY_FUNCTION__))
;
3187
3188 // Before splitting the value we cast it to i64
3189 Arg = DAG.getBitcast(MVT::i64, Arg);
3190
3191 // Splitting the value into two i32 types
3192 SDValue Lo, Hi;
3193 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
3194 DAG.getConstant(0, Dl, MVT::i32));
3195 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
3196 DAG.getConstant(1, Dl, MVT::i32));
3197
3198 // Attach the two i32 types into corresponding registers
3199 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
3200 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
3201}
3202
3203SDValue
3204X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3205 bool isVarArg,
3206 const SmallVectorImpl<ISD::OutputArg> &Outs,
3207 const SmallVectorImpl<SDValue> &OutVals,
3208 const SDLoc &dl, SelectionDAG &DAG) const {
3209 MachineFunction &MF = DAG.getMachineFunction();
3210 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3211
3212 // In some cases we need to disable registers from the default CSR list.
3213 // For example, when they are used as return registers (preserve_* and X86's
3214 // regcall) or for argument passing (X86's regcall).
3215 bool ShouldDisableCalleeSavedRegister =
3216 shouldDisableRetRegFromCSR(CallConv) ||
3217 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
3218
3219 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
3220 report_fatal_error("X86 interrupts may not return any value");
3221
3222 SmallVector<CCValAssign, 16> RVLocs;
3223 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
3224 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
3225
3226 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
3227 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
3228 ++I, ++OutsIndex) {
3229 CCValAssign &VA = RVLocs[I];
3230 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3230, __extension__
__PRETTY_FUNCTION__))
;
3231
3232 // Add the register to the CalleeSaveDisableRegs list.
3233 if (ShouldDisableCalleeSavedRegister)
3234 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
3235
3236 SDValue ValToCopy = OutVals[OutsIndex];
3237 EVT ValVT = ValToCopy.getValueType();
3238
3239 // Promote values to the appropriate types.
3240 if (VA.getLocInfo() == CCValAssign::SExt)
3241 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
3242 else if (VA.getLocInfo() == CCValAssign::ZExt)
3243 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
3244 else if (VA.getLocInfo() == CCValAssign::AExt) {
3245 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
3246 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
3247 else
3248 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
3249 }
3250 else if (VA.getLocInfo() == CCValAssign::BCvt)
3251 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
3252
3253 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3254, __extension__
__PRETTY_FUNCTION__))
3254 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3254, __extension__
__PRETTY_FUNCTION__))
;
3255
3256 // Report an error if we have attempted to return a value via an XMM
3257 // register and SSE was disabled.
3258 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3259 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3260 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3261 } else if (!Subtarget.hasSSE2() &&
3262 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3263 ValVT == MVT::f64) {
3264 // When returning a double via an XMM register, report an error if SSE2 is
3265 // not enabled.
3266 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3267 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3268 }
3269
3270 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
3271 // the RET instruction and handled by the FP Stackifier.
3272 if (VA.getLocReg() == X86::FP0 ||
3273 VA.getLocReg() == X86::FP1) {
3274 // If this is a copy from an xmm register to ST(0), use an FPExtend to
3275 // change the value to the FP stack register class.
3276 if (isScalarFPTypeInSSEReg(VA.getValVT()))
3277 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
3278 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3279 // Don't emit a copytoreg.
3280 continue;
3281 }
3282
3283 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
3284 // which is returned in RAX / RDX.
3285 if (Subtarget.is64Bit()) {
3286 if (ValVT == MVT::x86mmx) {
3287 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
3288 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
3289 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
3290 ValToCopy);
3291 // If we don't have SSE2 available, convert to v4f32 so the generated
3292 // register is legal.
3293 if (!Subtarget.hasSSE2())
3294 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
3295 }
3296 }
3297 }
3298
3299 if (VA.needsCustom()) {
3300 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3301, __extension__
__PRETTY_FUNCTION__))
3301 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3301, __extension__
__PRETTY_FUNCTION__))
;
3302
3303 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
3304 Subtarget);
3305
3306 // Add the second register to the CalleeSaveDisableRegs list.
3307 if (ShouldDisableCalleeSavedRegister)
3308 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
3309 } else {
3310 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3311 }
3312 }
3313
3314 SDValue Flag;
3315 SmallVector<SDValue, 6> RetOps;
3316 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3317 // Operand #1 = Bytes To Pop
3318 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
3319 MVT::i32));
3320
3321 // Copy the result values into the output registers.
3322 for (auto &RetVal : RetVals) {
3323 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
3324 RetOps.push_back(RetVal.second);
3325 continue; // Don't emit a copytoreg.
3326 }
3327
3328 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
3329 Flag = Chain.getValue(1);
3330 RetOps.push_back(
3331 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
3332 }
3333
3334 // Swift calling convention does not require we copy the sret argument
3335 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
3336
3337 // All x86 ABIs require that for returning structs by value we copy
3338 // the sret argument into %rax/%eax (depending on ABI) for the return.
3339 // We saved the argument into a virtual register in the entry block,
3340 // so now we copy the value out and into %rax/%eax.
3341 //
3342 // Checking Function.hasStructRetAttr() here is insufficient because the IR
3343 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
3344 // false, then an sret argument may be implicitly inserted in the SelDAG. In
3345 // either case FuncInfo->setSRetReturnReg() will have been called.
3346 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3347 // When we have both sret and another return value, we should use the
3348 // original Chain stored in RetOps[0], instead of the current Chain updated
3349 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
3350
3351 // For the case of sret and another return value, we have
3352 // Chain_0 at the function entry
3353 // Chain_1 = getCopyToReg(Chain_0) in the above loop
3354 // If we use Chain_1 in getCopyFromReg, we will have
3355 // Val = getCopyFromReg(Chain_1)
3356 // Chain_2 = getCopyToReg(Chain_1, Val) from below
3357
3358 // getCopyToReg(Chain_0) will be glued together with
3359 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
3360 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
3361 // Data dependency from Unit B to Unit A due to usage of Val in
3362 // getCopyToReg(Chain_1, Val)
3363 // Chain dependency from Unit A to Unit B
3364
3365 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
3366 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
3367 getPointerTy(MF.getDataLayout()));
3368
3369 Register RetValReg
3370 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
3371 X86::RAX : X86::EAX;
3372 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
3373 Flag = Chain.getValue(1);
3374
3375 // RAX/EAX now acts like a return value.
3376 RetOps.push_back(
3377 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
3378
3379 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
3380 // this however for preserve_most/preserve_all to minimize the number of
3381 // callee-saved registers for these CCs.
3382 if (ShouldDisableCalleeSavedRegister &&
3383 CallConv != CallingConv::PreserveAll &&
3384 CallConv != CallingConv::PreserveMost)
3385 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
3386 }
3387
3388 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3389 const MCPhysReg *I =
3390 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3391 if (I) {
3392 for (; *I; ++I) {
3393 if (X86::GR64RegClass.contains(*I))
3394 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3395 else
3396 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3396)
;
3397 }
3398 }
3399
3400 RetOps[0] = Chain; // Update chain.
3401
3402 // Add the flag if we have it.
3403 if (Flag.getNode())
3404 RetOps.push_back(Flag);
3405
3406 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
3407 if (CallConv == CallingConv::X86_INTR)
3408 opcode = X86ISD::IRET;
3409 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
3410}
3411
3412bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3413 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
3414 return false;
3415
3416 SDValue TCChain = Chain;
3417 SDNode *Copy = *N->use_begin();
3418 if (Copy->getOpcode() == ISD::CopyToReg) {
3419 // If the copy has a glue operand, we conservatively assume it isn't safe to
3420 // perform a tail call.
3421 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3422 return false;
3423 TCChain = Copy->getOperand(0);
3424 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
3425 return false;
3426
3427 bool HasRet = false;
3428 for (const SDNode *U : Copy->uses()) {
3429 if (U->getOpcode() != X86ISD::RET_FLAG)
3430 return false;
3431 // If we are returning more than one value, we can definitely
3432 // not make a tail call see PR19530
3433 if (U->getNumOperands() > 4)
3434 return false;
3435 if (U->getNumOperands() == 4 &&
3436 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
3437 return false;
3438 HasRet = true;
3439 }
3440
3441 if (!HasRet)
3442 return false;
3443
3444 Chain = TCChain;
3445 return true;
3446}
3447
3448EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
3449 ISD::NodeType ExtendKind) const {
3450 MVT ReturnMVT = MVT::i32;
3451
3452 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
3453 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
3454 // The ABI does not require i1, i8 or i16 to be extended.
3455 //
3456 // On Darwin, there is code in the wild relying on Clang's old behaviour of
3457 // always extending i8/i16 return values, so keep doing that for now.
3458 // (PR26665).
3459 ReturnMVT = MVT::i8;
3460 }
3461
3462 EVT MinVT = getRegisterType(Context, ReturnMVT);
3463 return VT.bitsLT(MinVT) ? MinVT : VT;
3464}
3465
3466/// Reads two 32 bit registers and creates a 64 bit mask value.
3467/// \param VA The current 32 bit value that need to be assigned.
3468/// \param NextVA The next 32 bit value that need to be assigned.
3469/// \param Root The parent DAG node.
3470/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
3471/// glue purposes. In the case the DAG is already using
3472/// physical register instead of virtual, we should glue
3473/// our new SDValue to InFlag SDvalue.
3474/// \return a new SDvalue of size 64bit.
3475static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
3476 SDValue &Root, SelectionDAG &DAG,
3477 const SDLoc &Dl, const X86Subtarget &Subtarget,
3478 SDValue *InFlag = nullptr) {
3479 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3479, __extension__
__PRETTY_FUNCTION__))
;
3480 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3480, __extension__
__PRETTY_FUNCTION__))
;
3481 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3482, __extension__
__PRETTY_FUNCTION__))
3482 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3482, __extension__
__PRETTY_FUNCTION__))
;
3483 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3484, __extension__
__PRETTY_FUNCTION__))
3484 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3484, __extension__
__PRETTY_FUNCTION__))
;
3485 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3486, __extension__
__PRETTY_FUNCTION__))
3486 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3486, __extension__
__PRETTY_FUNCTION__))
;
3487
3488 SDValue Lo, Hi;
3489 SDValue ArgValueLo, ArgValueHi;
3490
3491 MachineFunction &MF = DAG.getMachineFunction();
3492 const TargetRegisterClass *RC = &X86::GR32RegClass;
3493
3494 // Read a 32 bit value from the registers.
3495 if (nullptr == InFlag) {
3496 // When no physical register is present,
3497 // create an intermediate virtual register.
3498 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3499 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3500 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3501 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3502 } else {
3503 // When a physical register is available read the value from it and glue
3504 // the reads together.
3505 ArgValueLo =
3506 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
3507 *InFlag = ArgValueLo.getValue(2);
3508 ArgValueHi =
3509 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
3510 *InFlag = ArgValueHi.getValue(2);
3511 }
3512
3513 // Convert the i32 type into v32i1 type.
3514 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
3515
3516 // Convert the i32 type into v32i1 type.
3517 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
3518
3519 // Concatenate the two values together.
3520 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
3521}
3522
3523/// The function will lower a register of various sizes (8/16/32/64)
3524/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
3525/// \returns a DAG node contains the operand after lowering to mask type.
3526static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3527 const EVT &ValLoc, const SDLoc &Dl,
3528 SelectionDAG &DAG) {
3529 SDValue ValReturned = ValArg;
3530
3531 if (ValVT == MVT::v1i1)
3532 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3533
3534 if (ValVT == MVT::v64i1) {
3535 // In 32 bit machine, this case is handled by getv64i1Argument
3536 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3536, __extension__
__PRETTY_FUNCTION__))
;
3537 // In 64 bit machine, There is no need to truncate the value only bitcast
3538 } else {
3539 MVT maskLen;
3540 switch (ValVT.getSimpleVT().SimpleTy) {
3541 case MVT::v8i1:
3542 maskLen = MVT::i8;
3543 break;
3544 case MVT::v16i1:
3545 maskLen = MVT::i16;
3546 break;
3547 case MVT::v32i1:
3548 maskLen = MVT::i32;
3549 break;
3550 default:
3551 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3551)
;
3552 }
3553
3554 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3555 }
3556 return DAG.getBitcast(ValVT, ValReturned);
3557}
3558
3559/// Lower the result values of a call into the
3560/// appropriate copies out of appropriate physical registers.
3561///
3562SDValue X86TargetLowering::LowerCallResult(
3563 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3564 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3565 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3566 uint32_t *RegMask) const {
3567
3568 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3569 // Assign locations to each value returned by this call.
3570 SmallVector<CCValAssign, 16> RVLocs;
3571 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3572 *DAG.getContext());
3573 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3574
3575 // Copy all of the result registers out of their specified physreg.
3576 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3577 ++I, ++InsIndex) {
3578 CCValAssign &VA = RVLocs[I];
3579 EVT CopyVT = VA.getLocVT();
3580
3581 // In some calling conventions we need to remove the used registers
3582 // from the register mask.
3583 if (RegMask) {
3584 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3585 SubRegs.isValid(); ++SubRegs)
3586 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3587 }
3588
3589 // Report an error if there was an attempt to return FP values via XMM
3590 // registers.
3591 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3592 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3593 if (VA.getLocReg() == X86::XMM1)
3594 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3595 else
3596 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3597 } else if (!Subtarget.hasSSE2() &&
3598 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3599 CopyVT == MVT::f64) {
3600 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3601 if (VA.getLocReg() == X86::XMM1)
3602 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3603 else
3604 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3605 }
3606
3607 // If we prefer to use the value in xmm registers, copy it out as f80 and
3608 // use a truncate to move it from fp stack reg to xmm reg.
3609 bool RoundAfterCopy = false;
3610 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3611 isScalarFPTypeInSSEReg(VA.getValVT())) {
3612 if (!Subtarget.hasX87())
3613 report_fatal_error("X87 register return with X87 disabled");
3614 CopyVT = MVT::f80;
3615 RoundAfterCopy = (CopyVT != VA.getLocVT());
3616 }
3617
3618 SDValue Val;
3619 if (VA.needsCustom()) {
3620 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3621, __extension__
__PRETTY_FUNCTION__))
3621 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3621, __extension__
__PRETTY_FUNCTION__))
;
3622 Val =
3623 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3624 } else {
3625 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3626 .getValue(1);
3627 Val = Chain.getValue(0);
3628 InFlag = Chain.getValue(2);
3629 }
3630
3631 if (RoundAfterCopy)
3632 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3633 // This truncation won't change the value.
3634 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
3635
3636 if (VA.isExtInLoc()) {
3637 if (VA.getValVT().isVector() &&
3638 VA.getValVT().getScalarType() == MVT::i1 &&
3639 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3640 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3641 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3642 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3643 } else
3644 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3645 }
3646
3647 if (VA.getLocInfo() == CCValAssign::BCvt)
3648 Val = DAG.getBitcast(VA.getValVT(), Val);
3649
3650 InVals.push_back(Val);
3651 }
3652
3653 return Chain;
3654}
3655
3656//===----------------------------------------------------------------------===//
3657// C & StdCall & Fast Calling Convention implementation
3658//===----------------------------------------------------------------------===//
3659// StdCall calling convention seems to be standard for many Windows' API
3660// routines and around. It differs from C calling convention just a little:
3661// callee should clean up the stack, not caller. Symbols should be also
3662// decorated in some fancy way :) It doesn't support any vector arguments.
3663// For info on fast calling convention see Fast Calling Convention (tail call)
3664// implementation LowerX86_32FastCCCallTo.
3665
3666/// Determines whether Args, either a set of outgoing arguments to a call, or a
3667/// set of incoming args of a call, contains an sret pointer that the callee
3668/// pops
3669template <typename T>
3670static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
3671 const X86Subtarget &Subtarget) {
3672 // Not C++20 (yet), so no concepts available.
3673 static_assert(std::is_same_v<T, ISD::OutputArg> ||
3674 std::is_same_v<T, ISD::InputArg>,
3675 "requires ISD::OutputArg or ISD::InputArg");
3676
3677 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
3678 // for most compilations.
3679 if (!Subtarget.is32Bit())
3680 return false;
3681
3682 if (Args.empty())
3683 return false;
3684
3685 // Most calls do not have an sret argument, check the arg next.
3686 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
3687 if (!Flags.isSRet() || Flags.isInReg())
3688 return false;
3689
3690 // The MSVCabi does not pop the sret.
3691 if (Subtarget.getTargetTriple().isOSMSVCRT())
3692 return false;
3693
3694 // MCUs don't pop the sret
3695 if (Subtarget.isTargetMCU())
3696 return false;
3697
3698 // Callee pops argument
3699 return true;
3700}
3701
3702/// Make a copy of an aggregate at address specified by "Src" to address
3703/// "Dst" with size and alignment information specified by the specific
3704/// parameter attribute. The copy will be passed as a byval function parameter.
3705static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3706 SDValue Chain, ISD::ArgFlagsTy Flags,
3707 SelectionDAG &DAG, const SDLoc &dl) {
3708 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3709
3710 return DAG.getMemcpy(
3711 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3712 /*isVolatile*/ false, /*AlwaysInline=*/true,
3713 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3714}
3715
3716/// Return true if the calling convention is one that we can guarantee TCO for.
3717static bool canGuaranteeTCO(CallingConv::ID CC) {
3718 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3719 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3720 CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
3721}
3722
3723/// Return true if we might ever do TCO for calls with this calling convention.
3724static bool mayTailCallThisCC(CallingConv::ID CC) {
3725 switch (CC) {
3726 // C calling conventions:
3727 case CallingConv::C:
3728 case CallingConv::Win64:
3729 case CallingConv::X86_64_SysV:
3730 // Callee pop conventions:
3731 case CallingConv::X86_ThisCall:
3732 case CallingConv::X86_StdCall:
3733 case CallingConv::X86_VectorCall:
3734 case CallingConv::X86_FastCall:
3735 // Swift:
3736 case CallingConv::Swift:
3737 return true;
3738 default:
3739 return canGuaranteeTCO(CC);
3740 }
3741}
3742
3743/// Return true if the function is being made into a tailcall target by
3744/// changing its ABI.
3745static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3746 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3747 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3748}
3749
3750bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3751 if (!CI->isTailCall())
3752 return false;
3753
3754 CallingConv::ID CalleeCC = CI->getCallingConv();
3755 if (!mayTailCallThisCC(CalleeCC))
3756 return false;
3757
3758 return true;
3759}
3760
3761SDValue
3762X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3763 const SmallVectorImpl<ISD::InputArg> &Ins,
3764 const SDLoc &dl, SelectionDAG &DAG,
3765 const CCValAssign &VA,
3766 MachineFrameInfo &MFI, unsigned i) const {
3767 // Create the nodes corresponding to a load from this parameter slot.
3768 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3769 bool AlwaysUseMutable = shouldGuaranteeTCO(
3770 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3771 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3772 EVT ValVT;
3773 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3774
3775 // If value is passed by pointer we have address passed instead of the value
3776 // itself. No need to extend if the mask value and location share the same
3777 // absolute size.
3778 bool ExtendedInMem =
3779 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3780 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3781
3782 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3783 ValVT = VA.getLocVT();
3784 else
3785 ValVT = VA.getValVT();
3786
3787 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3788 // changed with more analysis.
3789 // In case of tail call optimization mark all arguments mutable. Since they
3790 // could be overwritten by lowering of arguments in case of a tail call.
3791 if (Flags.isByVal()) {
3792 unsigned Bytes = Flags.getByValSize();
3793 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3794
3795 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3796 // can be improved with deeper analysis.
3797 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3798 /*isAliased=*/true);
3799 return DAG.getFrameIndex(FI, PtrVT);
3800 }
3801
3802 EVT ArgVT = Ins[i].ArgVT;
3803
3804 // If this is a vector that has been split into multiple parts, and the
3805 // scalar size of the parts don't match the vector element size, then we can't
3806 // elide the copy. The parts will have padding between them instead of being
3807 // packed like a vector.
3808 bool ScalarizedAndExtendedVector =
3809 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3810 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3811
3812 // This is an argument in memory. We might be able to perform copy elision.
3813 // If the argument is passed directly in memory without any extension, then we
3814 // can perform copy elision. Large vector types, for example, may be passed
3815 // indirectly by pointer.
3816 if (Flags.isCopyElisionCandidate() &&
3817 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3818 !ScalarizedAndExtendedVector) {
3819 SDValue PartAddr;
3820 if (Ins[i].PartOffset == 0) {
3821 // If this is a one-part value or the first part of a multi-part value,
3822 // create a stack object for the entire argument value type and return a
3823 // load from our portion of it. This assumes that if the first part of an
3824 // argument is in memory, the rest will also be in memory.
3825 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3826 /*IsImmutable=*/false);
3827 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3828 return DAG.getLoad(
3829 ValVT, dl, Chain, PartAddr,
3830 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3831 } else {
3832 // This is not the first piece of an argument in memory. See if there is
3833 // already a fixed stack object including this offset. If so, assume it
3834 // was created by the PartOffset == 0 branch above and create a load from
3835 // the appropriate offset into it.
3836 int64_t PartBegin = VA.getLocMemOffset();
3837 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3838 int FI = MFI.getObjectIndexBegin();
3839 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3840 int64_t ObjBegin = MFI.getObjectOffset(FI);
3841 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3842 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3843 break;
3844 }
3845 if (MFI.isFixedObjectIndex(FI)) {
3846 SDValue Addr =
3847 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3848 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3849 return DAG.getLoad(
3850 ValVT, dl, Chain, Addr,
3851 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3852 Ins[i].PartOffset));
3853 }
3854 }
3855 }
3856
3857 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3858 VA.getLocMemOffset(), isImmutable);
3859
3860 // Set SExt or ZExt flag.
3861 if (VA.getLocInfo() == CCValAssign::ZExt) {
3862 MFI.setObjectZExt(FI, true);
3863 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3864 MFI.setObjectSExt(FI, true);
3865 }
3866
3867 MaybeAlign Alignment;
3868 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
3869 ValVT != MVT::f80)
3870 Alignment = MaybeAlign(4);
3871 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3872 SDValue Val = DAG.getLoad(
3873 ValVT, dl, Chain, FIN,
3874 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3875 Alignment);
3876 return ExtendedInMem
3877 ? (VA.getValVT().isVector()
3878 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3879 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3880 : Val;
3881}
3882
3883// FIXME: Get this from tablegen.
3884static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3885 const X86Subtarget &Subtarget) {
3886 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3886, __extension__ __PRETTY_FUNCTION__))
;
3887
3888 if (Subtarget.isCallingConvWin64(CallConv)) {
3889 static const MCPhysReg GPR64ArgRegsWin64[] = {
3890 X86::RCX, X86::RDX, X86::R8, X86::R9
3891 };
3892 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3893 }
3894
3895 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3896 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3897 };
3898 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3899}
3900
3901// FIXME: Get this from tablegen.
3902static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3903 CallingConv::ID CallConv,
3904 const X86Subtarget &Subtarget) {
3905 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3905, __extension__ __PRETTY_FUNCTION__))
;
3906 if (Subtarget.isCallingConvWin64(CallConv)) {
3907 // The XMM registers which might contain var arg parameters are shadowed
3908 // in their paired GPR. So we only need to save the GPR to their home
3909 // slots.
3910 // TODO: __vectorcall will change this.
3911 return std::nullopt;
3912 }
3913
3914 bool isSoftFloat = Subtarget.useSoftFloat();
3915 if (isSoftFloat || !Subtarget.hasSSE1())
3916 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3917 // registers.
3918 return std::nullopt;
3919
3920 static const MCPhysReg XMMArgRegs64Bit[] = {
3921 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3922 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3923 };
3924 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3925}
3926
3927#ifndef NDEBUG
3928static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3929 return llvm::is_sorted(
3930 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3931 return A.getValNo() < B.getValNo();
3932 });
3933}
3934#endif
3935
3936namespace {
3937/// This is a helper class for lowering variable arguments parameters.
3938class VarArgsLoweringHelper {
3939public:
3940 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3941 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3942 CallingConv::ID CallConv, CCState &CCInfo)
3943 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3944 TheMachineFunction(DAG.getMachineFunction()),
3945 TheFunction(TheMachineFunction.getFunction()),
3946 FrameInfo(TheMachineFunction.getFrameInfo()),
3947 FrameLowering(*Subtarget.getFrameLowering()),
3948 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3949 CCInfo(CCInfo) {}
3950
3951 // Lower variable arguments parameters.
3952 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3953
3954private:
3955 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3956
3957 void forwardMustTailParameters(SDValue &Chain);
3958
3959 bool is64Bit() const { return Subtarget.is64Bit(); }
3960 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3961
3962 X86MachineFunctionInfo *FuncInfo;
3963 const SDLoc &DL;
3964 SelectionDAG &DAG;
3965 const X86Subtarget &Subtarget;
3966 MachineFunction &TheMachineFunction;
3967 const Function &TheFunction;
3968 MachineFrameInfo &FrameInfo;
3969 const TargetFrameLowering &FrameLowering;
3970 const TargetLowering &TargLowering;
3971 CallingConv::ID CallConv;
3972 CCState &CCInfo;
3973};
3974} // namespace
3975
3976void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3977 SDValue &Chain, unsigned StackSize) {
3978 // If the function takes variable number of arguments, make a frame index for
3979 // the start of the first vararg value... for expansion of llvm.va_start. We
3980 // can skip this if there are no va_start calls.
3981 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3982 CallConv != CallingConv::X86_ThisCall)) {
3983 FuncInfo->setVarArgsFrameIndex(
3984 FrameInfo.CreateFixedObject(1, StackSize, true));
3985 }
3986
3987 // 64-bit calling conventions support varargs and register parameters, so we
3988 // have to do extra work to spill them in the prologue.
3989 if (is64Bit()) {
3990 // Find the first unallocated argument registers.
3991 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3992 ArrayRef<MCPhysReg> ArgXMMs =
3993 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3994 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3995 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3996
3997 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3998, __extension__
__PRETTY_FUNCTION__))
3998 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3998, __extension__
__PRETTY_FUNCTION__))
;
3999
4000 if (isWin64()) {
4001 // Get to the caller-allocated home save location. Add 8 to account
4002 // for the return address.
4003 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
4004 FuncInfo->setRegSaveFrameIndex(
4005 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
4006 // Fixup to set vararg frame on shadow area (4 x i64).
4007 if (NumIntRegs < 4)
4008 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
4009 } else {
4010 // For X86-64, if there are vararg parameters that are passed via
4011 // registers, then we must store them to their spots on the stack so
4012 // they may be loaded by dereferencing the result of va_next.
4013 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
4014 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
4015 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
4016 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
4017 }
4018
4019 SmallVector<SDValue, 6>
4020 LiveGPRs; // list of SDValue for GPR registers keeping live input value
4021 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
4022 // keeping live input value
4023 SDValue ALVal; // if applicable keeps SDValue for %al register
4024
4025 // Gather all the live in physical registers.
4026 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
4027 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
4028 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
4029 }
4030 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
4031 if (!AvailableXmms.empty()) {
4032 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
4033 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
4034 for (MCPhysReg Reg : AvailableXmms) {
4035 // FastRegisterAllocator spills virtual registers at basic
4036 // block boundary. That leads to usages of xmm registers
4037 // outside of check for %al. Pass physical registers to
4038 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
4039 TheMachineFunction.getRegInfo().addLiveIn(Reg);
4040 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
4041 }
4042 }
4043
4044 // Store the integer parameter registers.
4045 SmallVector<SDValue, 8> MemOps;
4046 SDValue RSFIN =
4047 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
4048 TargLowering.getPointerTy(DAG.getDataLayout()));
4049 unsigned Offset = FuncInfo->getVarArgsGPOffset();
4050 for (SDValue Val : LiveGPRs) {
4051 SDValue FIN = DAG.getNode(ISD::ADD, DL,
4052 TargLowering.getPointerTy(DAG.getDataLayout()),
4053 RSFIN, DAG.getIntPtrConstant(Offset, DL));
4054 SDValue Store =
4055 DAG.getStore(Val.getValue(1), DL, Val, FIN,
4056 MachinePointerInfo::getFixedStack(
4057 DAG.getMachineFunction(),
4058 FuncInfo->getRegSaveFrameIndex(), Offset));
4059 MemOps.push_back(Store);
4060 Offset += 8;
4061 }
4062
4063 // Now store the XMM (fp + vector) parameter registers.
4064 if (!LiveXMMRegs.empty()) {
4065 SmallVector<SDValue, 12> SaveXMMOps;
4066 SaveXMMOps.push_back(Chain);
4067 SaveXMMOps.push_back(ALVal);
4068 SaveXMMOps.push_back(RSFIN);
4069 SaveXMMOps.push_back(
4070 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
4071 llvm::append_range(SaveXMMOps, LiveXMMRegs);
4072 MachineMemOperand *StoreMMO =
4073 DAG.getMachineFunction().getMachineMemOperand(
4074 MachinePointerInfo::getFixedStack(
4075 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
4076 Offset),
4077 MachineMemOperand::MOStore, 128, Align(16));
4078 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
4079 DL, DAG.getVTList(MVT::Other),
4080 SaveXMMOps, MVT::i8, StoreMMO));
4081 }
4082
4083 if (!MemOps.empty())
4084 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4085 }
4086}
4087
4088void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
4089 // Find the largest legal vector type.
4090 MVT VecVT = MVT::Other;
4091 // FIXME: Only some x86_32 calling conventions support AVX512.
4092 if (Subtarget.useAVX512Regs() &&
4093 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
4094 CallConv == CallingConv::Intel_OCL_BI)))
4095 VecVT = MVT::v16f32;
4096 else if (Subtarget.hasAVX())
4097 VecVT = MVT::v8f32;
4098 else if (Subtarget.hasSSE2())
4099 VecVT = MVT::v4f32;
4100
4101 // We forward some GPRs and some vector types.
4102 SmallVector<MVT, 2> RegParmTypes;
4103 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
4104 RegParmTypes.push_back(IntVT);
4105 if (VecVT != MVT::Other)
4106 RegParmTypes.push_back(VecVT);
4107
4108 // Compute the set of forwarded registers. The rest are scratch.
4109 SmallVectorImpl<ForwardedRegister> &Forwards =
4110 FuncInfo->getForwardedMustTailRegParms();
4111 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
4112
4113 // Forward AL for SysV x86_64 targets, since it is used for varargs.
4114 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
4115 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
4116 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
4117 }
4118
4119 // Copy all forwards from physical to virtual registers.
4120 for (ForwardedRegister &FR : Forwards) {
4121 // FIXME: Can we use a less constrained schedule?
4122 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
4123 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
4124 TargLowering.getRegClassFor(FR.VT));
4125 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
4126 }
4127}
4128
4129void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
4130 unsigned StackSize) {
4131 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
4132 // If necessary, it would be set into the correct value later.
4133 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
4134 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4135
4136 if (FrameInfo.hasVAStart())
4137 createVarArgAreaAndStoreRegisters(Chain, StackSize);
4138
4139 if (FrameInfo.hasMustTailInVarArgFunc())
4140 forwardMustTailParameters(Chain);
4141}
4142
4143SDValue X86TargetLowering::LowerFormalArguments(
4144 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4145 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4146 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4147 MachineFunction &MF = DAG.getMachineFunction();
4148 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4149
4150 const Function &F = MF.getFunction();
4151 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
4152 F.getName() == "main")
4153 FuncInfo->setForceFramePointer(true);
4154
4155 MachineFrameInfo &MFI = MF.getFrameInfo();
4156 bool Is64Bit = Subtarget.is64Bit();
4157 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4158
4159 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4161, __extension__
__PRETTY_FUNCTION__))
4160 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4161, __extension__
__PRETTY_FUNCTION__))
4161 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4161, __extension__
__PRETTY_FUNCTION__))
;
4162
4163 // Assign locations to all of the incoming arguments.
4164 SmallVector<CCValAssign, 16> ArgLocs;
4165 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4166
4167 // Allocate shadow area for Win64.
4168 if (IsWin64)
4169 CCInfo.AllocateStack(32, Align(8));
4170
4171 CCInfo.AnalyzeArguments(Ins, CC_X86);
4172
4173 // In vectorcall calling convention a second pass is required for the HVA
4174 // types.
4175 if (CallingConv::X86_VectorCall == CallConv) {
4176 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
4177 }
4178
4179 // The next loop assumes that the locations are in the same order of the
4180 // input arguments.
4181 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4182, __extension__
__PRETTY_FUNCTION__))
4182 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4182, __extension__
__PRETTY_FUNCTION__))
;
4183
4184 SDValue ArgValue;
4185 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
4186 ++I, ++InsIndex) {
4187 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4187, __extension__
__PRETTY_FUNCTION__))
;
4188 CCValAssign &VA = ArgLocs[I];
4189
4190 if (VA.isRegLoc()) {
4191 EVT RegVT = VA.getLocVT();
4192 if (VA.needsCustom()) {
4193 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4195, __extension__
__PRETTY_FUNCTION__))
4194 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4195, __extension__
__PRETTY_FUNCTION__))
4195 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4195, __extension__
__PRETTY_FUNCTION__))
;
4196
4197 // v64i1 values, in regcall calling convention, that are
4198 // compiled to 32 bit arch, are split up into two registers.
4199 ArgValue =
4200 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
4201 } else {
4202 const TargetRegisterClass *RC;
4203 if (RegVT == MVT::i8)
4204 RC = &X86::GR8RegClass;
4205 else if (RegVT == MVT::i16)
4206 RC = &X86::GR16RegClass;
4207 else if (RegVT == MVT::i32)
4208 RC = &X86::GR32RegClass;
4209 else if (Is64Bit && RegVT == MVT::i64)
4210 RC = &X86::GR64RegClass;
4211 else if (RegVT == MVT::f16)
4212 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
4213 else if (RegVT == MVT::f32)
4214 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
4215 else if (RegVT == MVT::f64)
4216 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
4217 else if (RegVT == MVT::f80)
4218 RC = &X86::RFP80RegClass;
4219 else if (RegVT == MVT::f128)
4220 RC = &X86::VR128RegClass;
4221 else if (RegVT.is512BitVector())
4222 RC = &X86::VR512RegClass;
4223 else if (RegVT.is256BitVector())
4224 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
4225 else if (RegVT.is128BitVector())
4226 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
4227 else if (RegVT == MVT::x86mmx)
4228 RC = &X86::VR64RegClass;
4229 else if (RegVT == MVT::v1i1)
4230 RC = &X86::VK1RegClass;
4231 else if (RegVT == MVT::v8i1)
4232 RC = &X86::VK8RegClass;
4233 else if (RegVT == MVT::v16i1)
4234 RC = &X86::VK16RegClass;
4235 else if (RegVT == MVT::v32i1)
4236 RC = &X86::VK32RegClass;
4237 else if (RegVT == MVT::v64i1)
4238 RC = &X86::VK64RegClass;
4239 else
4240 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4240)
;
4241
4242 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4243 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4244 }
4245
4246 // If this is an 8 or 16-bit value, it is really passed promoted to 32
4247 // bits. Insert an assert[sz]ext to capture this, then truncate to the
4248 // right size.
4249 if (VA.getLocInfo() == CCValAssign::SExt)
4250 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4251 DAG.getValueType(VA.getValVT()));
4252 else if (VA.getLocInfo() == CCValAssign::ZExt)
4253 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4254 DAG.getValueType(VA.getValVT()));
4255 else if (VA.getLocInfo() == CCValAssign::BCvt)
4256 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
4257
4258 if (VA.isExtInLoc()) {
4259 // Handle MMX values passed in XMM regs.
4260 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
4261 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
4262 else if (VA.getValVT().isVector() &&
4263 VA.getValVT().getScalarType() == MVT::i1 &&
4264 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
4265 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
4266 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
4267 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
4268 } else
4269 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4270 }
4271 } else {
4272 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4272, __extension__ __PRETTY_FUNCTION__))
;
4273 ArgValue =
4274 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
4275 }
4276
4277 // If value is passed via pointer - do a load.
4278 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
4279 ArgValue =
4280 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
4281
4282 InVals.push_back(ArgValue);
4283 }
4284
4285 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
4286 if (Ins[I].Flags.isSwiftAsync()) {
4287 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
4288 if (Subtarget.is64Bit())
4289 X86FI->setHasSwiftAsyncContext(true);
4290 else {
4291 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
4292 X86FI->setSwiftAsyncContextFrameIdx(FI);
4293 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
4294 DAG.getFrameIndex(FI, MVT::i32),
4295 MachinePointerInfo::getFixedStack(MF, FI));
4296 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
4297 }
4298 }
4299
4300 // Swift calling convention does not require we copy the sret argument
4301 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
4302 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
4303 continue;
4304
4305 // All x86 ABIs require that for returning structs by value we copy the
4306 // sret argument into %rax/%eax (depending on ABI) for the return. Save
4307 // the argument into a virtual register so that we can access it from the
4308 // return points.
4309 if (Ins[I].Flags.isSRet()) {
4310 assert(!FuncInfo->getSRetReturnReg() &&(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4311, __extension__
__PRETTY_FUNCTION__))
4311 "SRet return has already been set")(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4311, __extension__
__PRETTY_FUNCTION__))
;
4312 MVT PtrTy = getPointerTy(DAG.getDataLayout());
4313 Register Reg =
4314 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
4315 FuncInfo->setSRetReturnReg(Reg);
4316 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
4317 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
4318 break;
4319 }
4320 }
4321
4322 unsigned StackSize = CCInfo.getNextStackOffset();
4323 // Align stack specially for tail calls.
4324 if (shouldGuaranteeTCO(CallConv,
4325 MF.getTarget().Options.GuaranteedTailCallOpt))
4326 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
4327
4328 if (IsVarArg)
4329 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
4330 .lowerVarArgsParameters(Chain, StackSize);
4331
4332 // Some CCs need callee pop.
4333 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
4334 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4335 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
4336 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
4337 // X86 interrupts must pop the error code (and the alignment padding) if
4338 // present.
4339 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
4340 } else {
4341 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
4342 // If this is an sret function, the return should pop the hidden pointer.
4343 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
4344 FuncInfo->setBytesToPopOnReturn(4);
4345 }
4346
4347 if (!Is64Bit) {
4348 // RegSaveFrameIndex is X86-64 only.
4349 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4350 }
4351
4352 FuncInfo->setArgumentStackSize(StackSize);
4353
4354 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
4355 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
4356 if (Personality == EHPersonality::CoreCLR) {
4357 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "llvm/lib/Target/X86/X86ISelLowering.cpp", 4357,
__extension__ __PRETTY_FUNCTION__))
;
4358 // TODO: Add a mechanism to frame lowering that will allow us to indicate
4359 // that we'd prefer this slot be allocated towards the bottom of the frame
4360 // (i.e. near the stack pointer after allocating the frame). Every
4361 // funclet needs a copy of this slot in its (mostly empty) frame, and the
4362 // offset from the bottom of this and each funclet's frame must be the
4363 // same, so the size of funclets' (mostly empty) frames is dictated by
4364 // how far this slot is from the bottom (since they allocate just enough
4365 // space to accommodate holding this slot at the correct offset).
4366 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
4367 EHInfo->PSPSymFrameIdx = PSPSymFI;
4368 }
4369 }
4370
4371 if (shouldDisableArgRegFromCSR(CallConv) ||
4372 F.hasFnAttribute("no_caller_saved_registers")) {
4373 MachineRegisterInfo &MRI = MF.getRegInfo();
4374 for (std::pair<Register, Register> Pair : MRI.liveins())
4375 MRI.disableCalleeSavedRegister(Pair.first);
4376 }
4377
4378 return Chain;
4379}
4380
4381SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
4382 SDValue Arg, const SDLoc &dl,
4383 SelectionDAG &DAG,
4384 const CCValAssign &VA,
4385 ISD::ArgFlagsTy Flags,
4386 bool isByVal) const {
4387 unsigned LocMemOffset = VA.getLocMemOffset();
4388 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4389 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4390 StackPtr, PtrOff);
4391 if (isByVal)
4392 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
4393
4394 MaybeAlign Alignment;
4395 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
4396 Arg.getSimpleValueType() != MVT::f80)
4397 Alignment = MaybeAlign(4);
4398 return DAG.getStore(
4399 Chain, dl, Arg, PtrOff,
4400 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
4401 Alignment);
4402}
4403
4404/// Emit a load of return address if tail call
4405/// optimization is performed and it is required.
4406SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
4407 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
4408 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
4409 // Adjust the Return address stack slot.
4410 EVT VT = getPointerTy(DAG.getDataLayout());
4411 OutRetAddr = getReturnAddressFrameIndex(DAG);
4412
4413 // Load the "old" Return address.
4414 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
4415 return SDValue(OutRetAddr.getNode(), 1);
4416}
4417
4418/// Emit a store of the return address if tail call
4419/// optimization is performed and it is required (FPDiff!=0).
4420static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
4421 SDValue Chain, SDValue RetAddrFrIdx,
4422 EVT PtrVT, unsigned SlotSize,
4423 int FPDiff, const SDLoc &dl) {
4424 // Store the return address to the appropriate stack slot.
4425 if (!FPDiff) return Chain;
4426 // Calculate the new stack slot for the return address.
4427 int NewReturnAddrFI =
4428 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
4429 false);
4430 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
4431 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
4432 MachinePointerInfo::getFixedStack(
4433 DAG.getMachineFunction(), NewReturnAddrFI));
4434 return Chain;
4435}
4436
4437/// Returns a vector_shuffle mask for an movs{s|d}, movd
4438/// operation of specified width.
4439static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
4440 SDValue V2) {
4441 unsigned NumElems = VT.getVectorNumElements();
4442 SmallVector<int, 8> Mask;
4443 Mask.push_back(NumElems);
4444 for (unsigned i = 1; i != NumElems; ++i)
4445 Mask.push_back(i);
4446 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
4447}
4448
4449SDValue
4450X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4451 SmallVectorImpl<SDValue> &InVals) const {
4452 SelectionDAG &DAG = CLI.DAG;
4453 SDLoc &dl = CLI.DL;
4454 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4455 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4456 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4457 SDValue Chain = CLI.Chain;
4458 SDValue Callee = CLI.Callee;
4459 CallingConv::ID CallConv = CLI.CallConv;
4460 bool &isTailCall = CLI.IsTailCall;
4461 bool isVarArg = CLI.IsVarArg;
4462 const auto *CB = CLI.CB;
4463
4464 MachineFunction &MF = DAG.getMachineFunction();
4465 bool Is64Bit = Subtarget.is64Bit();
4466 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4467 bool IsSibcall = false;
4468 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
4469 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
4470 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
4471 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
4472 bool HasNCSR = (CB && isa<CallInst>(CB) &&
4473 CB->hasFnAttr("no_caller_saved_registers"));
4474 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
4475 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
4476 bool IsCFICall = IsIndirectCall && CLI.CFIType;
4477 const Module *M = MF.getMMI().getModule();
4478 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
4479
4480 MachineFunction::CallSiteInfo CSInfo;
4481 if (CallConv == CallingConv::X86_INTR)
4482 report_fatal_error("X86 interrupts may not be called directly");
4483
4484 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
4485 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
4486 // If we are using a GOT, disable tail calls to external symbols with
4487 // default visibility. Tail calling such a symbol requires using a GOT
4488 // relocation, which forces early binding of the symbol. This breaks code
4489 // that require lazy function symbol resolution. Using musttail or
4490 // GuaranteedTailCallOpt will override this.
4491 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4492 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
4493 G->getGlobal()->hasDefaultVisibility()))
4494 isTailCall = false;
4495 }
4496
4497 if (isTailCall && !IsMustTail) {
4498 // Check if it's really possible to do a tail call.
4499 isTailCall = IsEligibleForTailCallOptimization(
4500 Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
4501 Ins, DAG);
4502
4503 // Sibcalls are automatically detected tailcalls which do not require
4504 // ABI changes.
4505 if (!IsGuaranteeTCO && isTailCall)
4506 IsSibcall = true;
4507
4508 if (isTailCall)
4509 ++NumTailCalls;
4510 }
4511
4512 if (IsMustTail && !isTailCall)
4513 report_fatal_error("failed to perform tail call elimination on a call "
4514 "site marked musttail");
4515
4516 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4517, __extension__
__PRETTY_FUNCTION__))
4517 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4517, __extension__
__PRETTY_FUNCTION__))
;
4518
4519 // Analyze operands of the call, assigning locations to each operand.
4520 SmallVector<CCValAssign, 16> ArgLocs;
4521 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4522
4523 // Allocate shadow area for Win64.
4524 if (IsWin64)
4525 CCInfo.AllocateStack(32, Align(8));
4526
4527 CCInfo.AnalyzeArguments(Outs, CC_X86);
4528
4529 // In vectorcall calling convention a second pass is required for the HVA
4530 // types.
4531 if (CallingConv::X86_VectorCall == CallConv) {
4532 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
4533 }
4534
4535 // Get a count of how many bytes are to be pushed on the stack.
4536 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
4537 if (IsSibcall)
4538 // This is a sibcall. The memory operands are available in caller's
4539 // own caller's stack.
4540 NumBytes = 0;
4541 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4542 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4543
4544 int FPDiff = 0;
4545 if (isTailCall &&
4546 shouldGuaranteeTCO(CallConv,
4547 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4548 // Lower arguments at fp - stackoffset + fpdiff.
4549 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4550
4551 FPDiff = NumBytesCallerPushed - NumBytes;
4552
4553 // Set the delta of movement of the returnaddr stackslot.
4554 // But only set if delta is greater than previous delta.
4555 if (FPDiff < X86Info->getTCReturnAddrDelta())
4556 X86Info->setTCReturnAddrDelta(FPDiff);
4557 }
4558
4559 unsigned NumBytesToPush = NumBytes;
4560 unsigned NumBytesToPop = NumBytes;
4561
4562 // If we have an inalloca argument, all stack space has already been allocated
4563 // for us and be right at the top of the stack. We don't support multiple
4564 // arguments passed in memory when using inalloca.
4565 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4566 NumBytesToPush = 0;
4567 if (!ArgLocs.back().isMemLoc())
4568 report_fatal_error("cannot use inalloca attribute on a register "
4569 "parameter");
4570 if (ArgLocs.back().getLocMemOffset() != 0)
4571 report_fatal_error("any parameter with the inalloca attribute must be "
4572 "the only memory argument");
4573 } else if (CLI.IsPreallocated) {
4574 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4576, __extension__
__PRETTY_FUNCTION__))
4575 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4576, __extension__
__PRETTY_FUNCTION__))
4576 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4576, __extension__
__PRETTY_FUNCTION__))
;
4577 SmallVector<size_t, 4> PreallocatedOffsets;
4578 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4579 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4580 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4581 }
4582 }
4583 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4584 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4585 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4586 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4587 NumBytesToPush = 0;
4588 }
4589
4590 if (!IsSibcall && !IsMustTail)
4591 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4592 NumBytes - NumBytesToPush, dl);
4593
4594 SDValue RetAddrFrIdx;
4595 // Load return address for tail calls.
4596 if (isTailCall && FPDiff)
4597 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4598 Is64Bit, FPDiff, dl);
4599
4600 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4601 SmallVector<SDValue, 8> MemOpChains;
4602 SDValue StackPtr;
4603
4604 // The next loop assumes that the locations are in the same order of the
4605 // input arguments.
4606 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4607, __extension__
__PRETTY_FUNCTION__))
4607 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4607, __extension__
__PRETTY_FUNCTION__))
;
4608
4609 // Walk the register/memloc assignments, inserting copies/loads. In the case
4610 // of tail call optimization arguments are handle later.
4611 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4612 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4613 ++I, ++OutIndex) {
4614 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4614, __extension__
__PRETTY_FUNCTION__))
;
4615 // Skip inalloca/preallocated arguments, they have already been written.
4616 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4617 if (Flags.isInAlloca() || Flags.isPreallocated())
4618 continue;
4619
4620 CCValAssign &VA = ArgLocs[I];
4621 EVT RegVT = VA.getLocVT();
4622 SDValue Arg = OutVals[OutIndex];
4623 bool isByVal = Flags.isByVal();
4624
4625 // Promote the value if needed.
4626 switch (VA.getLocInfo()) {
4627 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4627)
;
4628 case CCValAssign::Full: break;
4629 case CCValAssign::SExt:
4630 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4631 break;
4632 case CCValAssign::ZExt:
4633 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4634 break;
4635 case CCValAssign::AExt:
4636 if (Arg.getValueType().isVector() &&
4637 Arg.getValueType().getVectorElementType() == MVT::i1)
4638 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4639 else if (RegVT.is128BitVector()) {
4640 // Special case: passing MMX values in XMM registers.
4641 Arg = DAG.getBitcast(MVT::i64, Arg);
4642 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4643 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4644 } else
4645 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4646 break;
4647 case CCValAssign::BCvt:
4648 Arg = DAG.getBitcast(RegVT, Arg);
4649 break;
4650 case CCValAssign::Indirect: {
4651 if (isByVal) {
4652 // Memcpy the argument to a temporary stack slot to prevent
4653 // the caller from seeing any modifications the callee may make
4654 // as guaranteed by the `byval` attribute.
4655 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4656 Flags.getByValSize(),
4657 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4658 SDValue StackSlot =
4659 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4660 Chain =
4661 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4662 // From now on treat this as a regular pointer
4663 Arg = StackSlot;
4664 isByVal = false;
4665 } else {
4666 // Store the argument.
4667 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4668 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4669 Chain = DAG.getStore(
4670 Chain, dl, Arg, SpillSlot,
4671 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4672 Arg = SpillSlot;
4673 }
4674 break;
4675 }
4676 }
4677
4678 if (VA.needsCustom()) {
4679 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4680, __extension__
__PRETTY_FUNCTION__))
4680 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4680, __extension__
__PRETTY_FUNCTION__))
;
4681 // Split v64i1 value into two registers
4682 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4683 } else if (VA.isRegLoc()) {
4684 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4685 const TargetOptions &Options = DAG.getTarget().Options;
4686 if (Options.EmitCallSiteInfo)
4687 CSInfo.emplace_back(VA.getLocReg(), I);
4688 if (isVarArg && IsWin64) {
4689 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4690 // shadow reg if callee is a varargs function.
4691 Register ShadowReg;
4692 switch (VA.getLocReg()) {
4693 case X86::XMM0: ShadowReg = X86::RCX; break;
4694 case X86::XMM1: ShadowReg = X86::RDX; break;
4695 case X86::XMM2: ShadowReg = X86::R8; break;
4696 case X86::XMM3: ShadowReg = X86::R9; break;
4697 }
4698 if (ShadowReg)
4699 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4700 }
4701 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4702 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4702, __extension__ __PRETTY_FUNCTION__))
;
4703 if (!StackPtr.getNode())
4704 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4705 getPointerTy(DAG.getDataLayout()));
4706 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4707 dl, DAG, VA, Flags, isByVal));
4708 }
4709 }
4710
4711 if (!MemOpChains.empty())
4712 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4713
4714 if (Subtarget.isPICStyleGOT()) {
4715 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4716 // GOT pointer (except regcall).
4717 if (!isTailCall) {
4718 // Indirect call with RegCall calling convertion may use up all the
4719 // general registers, so it is not suitable to bind EBX reister for
4720 // GOT address, just let register allocator handle it.
4721 if (CallConv != CallingConv::X86_RegCall)
4722 RegsToPass.push_back(std::make_pair(
4723 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4724 getPointerTy(DAG.getDataLayout()))));
4725 } else {
4726 // If we are tail calling and generating PIC/GOT style code load the
4727 // address of the callee into ECX. The value in ecx is used as target of
4728 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4729 // for tail calls on PIC/GOT architectures. Normally we would just put the
4730 // address of GOT into ebx and then call target@PLT. But for tail calls
4731 // ebx would be restored (since ebx is callee saved) before jumping to the
4732 // target@PLT.
4733
4734 // Note: The actual moving to ECX is done further down.
4735 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4736 if (G && !G->getGlobal()->hasLocalLinkage() &&
4737 G->getGlobal()->hasDefaultVisibility())
4738 Callee = LowerGlobalAddress(Callee, DAG);
4739 else if (isa<ExternalSymbolSDNode>(Callee))
4740 Callee = LowerExternalSymbol(Callee, DAG);
4741 }
4742 }
4743
4744 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
4745 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
4746 // From AMD64 ABI document:
4747 // For calls that may call functions that use varargs or stdargs
4748 // (prototype-less calls or calls to functions containing ellipsis (...) in
4749 // the declaration) %al is used as hidden argument to specify the number
4750 // of SSE registers used. The contents of %al do not need to match exactly
4751 // the number of registers, but must be an ubound on the number of SSE
4752 // registers used and is in the range 0 - 8 inclusive.
4753
4754 // Count the number of XMM registers allocated.
4755 static const MCPhysReg XMMArgRegs[] = {
4756 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4757 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4758 };
4759 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4760 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4761, __extension__
__PRETTY_FUNCTION__))
4761 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4761, __extension__
__PRETTY_FUNCTION__))
;
4762 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4763 DAG.getConstant(NumXMMRegs, dl,
4764 MVT::i8)));
4765 }
4766
4767 if (isVarArg && IsMustTail) {
4768 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4769 for (const auto &F : Forwards) {
4770 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4771 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4772 }
4773 }
4774
4775 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4776 // don't need this because the eligibility check rejects calls that require
4777 // shuffling arguments passed in memory.
4778 if (!IsSibcall && isTailCall) {
4779 // Force all the incoming stack arguments to be loaded from the stack
4780 // before any new outgoing arguments are stored to the stack, because the
4781 // outgoing stack slots may alias the incoming argument stack slots, and
4782 // the alias isn't otherwise explicit. This is slightly more conservative
4783 // than necessary, because it means that each store effectively depends
4784 // on every argument instead of just those arguments it would clobber.
4785 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4786
4787 SmallVector<SDValue, 8> MemOpChains2;
4788 SDValue FIN;
4789 int FI = 0;
4790 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4791 ++I, ++OutsIndex) {
4792 CCValAssign &VA = ArgLocs[I];
4793
4794 if (VA.isRegLoc()) {
4795 if (VA.needsCustom()) {
4796 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4797, __extension__
__PRETTY_FUNCTION__))
4797 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4797, __extension__
__PRETTY_FUNCTION__))
;
4798 // This means that we are in special case where one argument was
4799 // passed through two register locations - Skip the next location
4800 ++I;
4801 }
4802
4803 continue;
4804 }
4805
4806 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4806, __extension__ __PRETTY_FUNCTION__))
;
4807 SDValue Arg = OutVals[OutsIndex];
4808 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4809 // Skip inalloca/preallocated arguments. They don't require any work.
4810 if (Flags.isInAlloca() || Flags.isPreallocated())
4811 continue;
4812 // Create frame index.
4813 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4814 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4815 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4816 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4817
4818 if (Flags.isByVal()) {
4819 // Copy relative to framepointer.
4820 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4821 if (!StackPtr.getNode())
4822 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4823 getPointerTy(DAG.getDataLayout()));
4824 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4825 StackPtr, Source);
4826
4827 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4828 ArgChain,
4829 Flags, DAG, dl));
4830 } else {
4831 // Store relative to framepointer.
4832 MemOpChains2.push_back(DAG.getStore(
4833 ArgChain, dl, Arg, FIN,
4834 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4835 }
4836 }
4837
4838 if (!MemOpChains2.empty())
4839 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4840
4841 // Store the return address to the appropriate stack slot.
4842 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4843 getPointerTy(DAG.getDataLayout()),
4844 RegInfo->getSlotSize(), FPDiff, dl);
4845 }
4846
4847 // Build a sequence of copy-to-reg nodes chained together with token chain
4848 // and flag operands which copy the outgoing args into registers.
4849 SDValue InFlag;
4850 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4851 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4852 RegsToPass[i].second, InFlag);
4853 InFlag = Chain.getValue(1);
4854 }
4855
4856 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4857 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4857, __extension__
__PRETTY_FUNCTION__))
;
4858 // In the 64-bit large code model, we have to make all calls
4859 // through a register, since the call instruction's 32-bit
4860 // pc-relative offset may not be large enough to hold the whole
4861 // address.
4862 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4863 Callee->getOpcode() == ISD::ExternalSymbol) {
4864 // Lower direct calls to global addresses and external symbols. Setting
4865 // ForCall to true here has the effect of removing WrapperRIP when possible
4866 // to allow direct calls to be selected without first materializing the
4867 // address into a register.
4868 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4869 } else if (Subtarget.isTarget64BitILP32() &&
4870 Callee.getValueType() == MVT::i32) {
4871 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4872 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4873 }
4874
4875 // Returns a chain & a flag for retval copy to use.
4876 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4877 SmallVector<SDValue, 8> Ops;
4878
4879 if (!IsSibcall && isTailCall && !IsMustTail) {
4880 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InFlag, dl);
4881 InFlag = Chain.getValue(1);
4882 }
4883
4884 Ops.push_back(Chain);
4885 Ops.push_back(Callee);
4886
4887 if (isTailCall)
4888 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4889
4890 // Add argument registers to the end of the list so that they are known live
4891 // into the call.
4892 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4893 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4894 RegsToPass[i].second.getValueType()));
4895
4896 // Add a register mask operand representing the call-preserved registers.
4897 const uint32_t *Mask = [&]() {
4898 auto AdaptedCC = CallConv;
4899 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4900 // use X86_INTR calling convention because it has the same CSR mask
4901 // (same preserved registers).
4902 if (HasNCSR)
4903 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4904 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4905 // to use the CSR_NoRegs_RegMask.
4906 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4907 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4908 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4909 }();
4910 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4910, __extension__
__PRETTY_FUNCTION__))
;
4911
4912 // If this is an invoke in a 32-bit function using a funclet-based
4913 // personality, assume the function clobbers all registers. If an exception
4914 // is thrown, the runtime will not restore CSRs.
4915 // FIXME: Model this more precisely so that we can register allocate across
4916 // the normal edge and spill and fill across the exceptional edge.
4917 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4918 const Function &CallerFn = MF.getFunction();
4919 EHPersonality Pers =
4920 CallerFn.hasPersonalityFn()
4921 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4922 : EHPersonality::Unknown;
4923 if (isFuncletEHPersonality(Pers))
4924 Mask = RegInfo->getNoPreservedMask();
4925 }
4926
4927 // Define a new register mask from the existing mask.
4928 uint32_t *RegMask = nullptr;
4929
4930 // In some calling conventions we need to remove the used physical registers
4931 // from the reg mask. Create a new RegMask for such calling conventions.
4932 // RegMask for calling conventions that disable only return registers (e.g.
4933 // preserve_most) will be modified later in LowerCallResult.
4934 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
4935 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
4936 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4937
4938 // Allocate a new Reg Mask and copy Mask.
4939 RegMask = MF.allocateRegMask();
4940 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4941 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4942
4943 // Make sure all sub registers of the argument registers are reset
4944 // in the RegMask.
4945 if (ShouldDisableArgRegs) {
4946 for (auto const &RegPair : RegsToPass)
4947 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4948 SubRegs.isValid(); ++SubRegs)
4949 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4950 }
4951
4952 // Create the RegMask Operand according to our updated mask.
4953 Ops.push_back(DAG.getRegisterMask(RegMask));
4954 } else {
4955 // Create the RegMask Operand according to the static mask.
4956 Ops.push_back(DAG.getRegisterMask(Mask));
4957 }
4958
4959 if (InFlag.getNode())
4960 Ops.push_back(InFlag);
4961
4962 if (isTailCall) {
4963 // We used to do:
4964 //// If this is the first return lowered for this function, add the regs
4965 //// to the liveout set for the function.
4966 // This isn't right, although it's probably harmless on x86; liveouts
4967 // should be computed from returns not tail calls. Consider a void
4968 // function making a tail call to a function returning int.
4969 MF.getFrameInfo().setHasTailCall();
4970 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4971
4972 if (IsCFICall)
4973 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4974
4975 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4976 return Ret;
4977 }
4978
4979 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4980 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4981 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4982 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4983 // expanded to the call, directly followed by a special marker sequence and
4984 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4985 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4986, __extension__
__PRETTY_FUNCTION__))
4986 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4986, __extension__
__PRETTY_FUNCTION__))
;
4987 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4987, __extension__
__PRETTY_FUNCTION__))
;
4988
4989 // Add a target global address for the retainRV/claimRV runtime function
4990 // just before the call target.
4991 Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
4992 auto PtrVT = getPointerTy(DAG.getDataLayout());
4993 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
4994 Ops.insert(Ops.begin() + 1, GA);
4995 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4996 } else {
4997 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4998 }
4999
5000 if (IsCFICall)
5001 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
5002
5003 InFlag = Chain.getValue(1);
5004 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5005 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
5006
5007 // Save heapallocsite metadata.
5008 if (CLI.CB)
5009 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
5010 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
5011
5012 // Create the CALLSEQ_END node.
5013 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
5014 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
5015 DAG.getTarget().Options.GuaranteedTailCallOpt))
5016 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
5017 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
5018 // If this call passes a struct-return pointer, the callee
5019 // pops that struct pointer.
5020 NumBytesForCalleeToPop = 4;
5021
5022 // Returns a flag for retval copy to use.
5023 if (!IsSibcall) {
5024 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
5025 InFlag, dl);
5026 InFlag = Chain.getValue(1);
5027 }
5028
5029 // Handle result values, copying them out of physregs into vregs that we
5030 // return.
5031 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
5032 InVals, RegMask);
5033}
5034
5035//===----------------------------------------------------------------------===//
5036// Fast Calling Convention (tail call) implementation
5037//===----------------------------------------------------------------------===//
5038
5039// Like std call, callee cleans arguments, convention except that ECX is
5040// reserved for storing the tail called function address. Only 2 registers are
5041// free for argument passing (inreg). Tail call optimization is performed
5042// provided:
5043// * tailcallopt is enabled
5044// * caller/callee are fastcc
5045// On X86_64 architecture with GOT-style position independent code only local
5046// (within module) calls are supported at the moment.
5047// To keep the stack aligned according to platform abi the function
5048// GetAlignedArgumentStackSize ensures that argument delta is always multiples
5049// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
5050// If a tail called function callee has more arguments than the caller the
5051// caller needs to make sure that there is room to move the RETADDR to. This is
5052// achieved by reserving an area the size of the argument delta right after the
5053// original RETADDR, but before the saved framepointer or the spilled registers
5054// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
5055// stack layout:
5056// arg1
5057// arg2
5058// RETADDR
5059// [ new RETADDR
5060// move area ]
5061// (possible EBP)
5062// ESI
5063// EDI
5064// local1 ..
5065
5066/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
5067/// requirement.
5068unsigned
5069X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
5070 SelectionDAG &DAG) const {
5071 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
5072 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
5073 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5074, __extension__
__PRETTY_FUNCTION__))
5074 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5074, __extension__
__PRETTY_FUNCTION__))
;
5075 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
5076}
5077
5078/// Return true if the given stack call argument is already available in the
5079/// same position (relatively) of the caller's incoming argument stack.
5080static
5081bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
5082 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
5083 const X86InstrInfo *TII, const CCValAssign &VA) {
5084 unsigned Bytes = Arg.getValueSizeInBits() / 8;
5085
5086 for (;;) {
5087 // Look through nodes that don't alter the bits of the incoming value.
5088 unsigned Op = Arg.getOpcode();
5089 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
5090 Arg = Arg.getOperand(0);
5091 continue;
5092 }
5093 if (Op == ISD::TRUNCATE) {
5094 const SDValue &TruncInput = Arg.getOperand(0);
5095 if (TruncInput.getOpcode() == ISD::AssertZext &&
5096 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
5097 Arg.getValueType()) {
5098 Arg = TruncInput.getOperand(0);
5099 continue;
5100 }
5101 }
5102 break;
5103 }
5104
5105 int FI = INT_MAX2147483647;
5106 if (Arg.getOpcode() == ISD::CopyFromReg) {
5107 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
5108 if (!VR.isVirtual())
5109 return false;
5110 MachineInstr *Def = MRI->getVRegDef(VR);
5111 if (!Def)
5112 return false;
5113 if (!Flags.isByVal()) {
5114 if (!TII->isLoadFromStackSlot(*Def, FI))
5115 return false;
5116 } else {
5117 unsigned Opcode = Def->getOpcode();
5118 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
5119 Opcode == X86::LEA64_32r) &&
5120 Def->getOperand(1).isFI()) {
5121 FI = Def->getOperand(1).getIndex();
5122 Bytes = Flags.getByValSize();
5123 } else
5124 return false;
5125 }
5126 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
5127 if (Flags.isByVal())
5128 // ByVal argument is passed in as a pointer but it's now being
5129 // dereferenced. e.g.
5130 // define @foo(%struct.X* %A) {
5131 // tail call @bar(%struct.X* byval %A)
5132 // }
5133 return false;
5134 SDValue Ptr = Ld->getBasePtr();
5135 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
5136 if (!FINode)
5137 return false;
5138 FI = FINode->getIndex();
5139 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
5140 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
5141 FI = FINode->getIndex();
5142 Bytes = Flags.getByValSize();
5143 } else
5144 return false;
5145
5146 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "llvm/lib/Target/X86/X86ISelLowering.cpp",
5146, __extension__ __PRETTY_FUNCTION__))
;
5147 if (!MFI.isFixedObjectIndex(FI))
5148 return false;
5149
5150 if (Offset != MFI.getObjectOffset(FI))
5151 return false;
5152
5153 // If this is not byval, check that the argument stack object is immutable.
5154 // inalloca and argument copy elision can create mutable argument stack
5155 // objects. Byval objects can be mutated, but a byval call intends to pass the
5156 // mutated memory.
5157 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
5158 return false;
5159
5160 if (VA.getLocVT().getFixedSizeInBits() >
5161 Arg.getValueSizeInBits().getFixedValue()) {
5162 // If the argument location is wider than the argument type, check that any
5163 // extension flags match.
5164 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
5165 Flags.isSExt() != MFI.isObjectSExt(FI)) {
5166 return false;
5167 }
5168 }
5169
5170 return Bytes == MFI.getObjectSize(FI);
5171}
5172
5173/// Check whether the call is eligible for tail call optimization. Targets
5174/// that want to do tail call optimization should implement this function.
5175bool X86TargetLowering::IsEligibleForTailCallOptimization(
5176 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
5177 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
5178 const SmallVectorImpl<SDValue> &OutVals,
5179 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5180 if (!mayTailCallThisCC(CalleeCC))
5181 return false;
5182
5183 // If -tailcallopt is specified, make fastcc functions tail-callable.
5184 MachineFunction &MF = DAG.getMachineFunction();
5185 const Function &CallerF = MF.getFunction();
5186
5187 // If the function return type is x86_fp80 and the callee return type is not,
5188 // then the FP_EXTEND of the call result is not a nop. It's not safe to
5189 // perform a tailcall optimization here.
5190 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
5191 return false;
5192
5193 CallingConv::ID CallerCC = CallerF.getCallingConv();
5194 bool CCMatch = CallerCC == CalleeCC;
5195 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
5196 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
5197 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
5198 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
5199
5200 // Win64 functions have extra shadow space for argument homing. Don't do the
5201 // sibcall if the caller and callee have mismatched expectations for this
5202 // space.
5203 if (IsCalleeWin64 != IsCallerWin64)
5204 return false;
5205
5206 if (IsGuaranteeTCO) {
5207 if (canGuaranteeTCO(CalleeCC) && CCMatch)
5208 return true;
5209 return false;
5210 }
5211
5212 // Look for obvious safe cases to perform tail call optimization that do not
5213 // require ABI changes. This is what gcc calls sibcall.
5214
5215 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
5216 // emit a special epilogue.
5217 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5218 if (RegInfo->hasStackRealignment(MF))
5219 return false;
5220
5221 // Also avoid sibcall optimization if we're an sret return fn and the callee
5222 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
5223 // insufficient.
5224 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
5225 // For a compatible tail call the callee must return our sret pointer. So it
5226 // needs to be (a) an sret function itself and (b) we pass our sret as its
5227 // sret. Condition #b is harder to determine.
5228 return false;
5229 } else if (IsCalleePopSRet)
5230 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
5231 // expect that.
5232 return false;
5233
5234 // Do not sibcall optimize vararg calls unless all arguments are passed via
5235 // registers.
5236 LLVMContext &C = *DAG.getContext();
5237 if (isVarArg && !Outs.empty()) {
5238 // Optimizing for varargs on Win64 is unlikely to be safe without
5239 // additional testing.
5240 if (IsCalleeWin64 || IsCallerWin64)
5241 return false;
5242
5243 SmallVector<CCValAssign, 16> ArgLocs;
5244 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5245
5246 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5247 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
5248 if (!ArgLocs[i].isRegLoc())
5249 return false;
5250 }
5251
5252 // If the call result is in ST0 / ST1, it needs to be popped off the x87
5253 // stack. Therefore, if it's not used by the call it is not safe to optimize
5254 // this into a sibcall.
5255 bool Unused = false;
5256 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5257 if (!Ins[i].Used) {
5258 Unused = true;
5259 break;
5260 }
5261 }
5262 if (Unused) {
5263 SmallVector<CCValAssign, 16> RVLocs;
5264 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
5265 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
5266 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5267 CCValAssign &VA = RVLocs[i];
5268 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
5269 return false;
5270 }
5271 }
5272
5273 // Check that the call results are passed in the same way.
5274 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5275 RetCC_X86, RetCC_X86))
5276 return false;
5277 // The callee has to preserve all registers the caller needs to preserve.
5278 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
5279 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5280 if (!CCMatch) {
5281 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5282 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5283 return false;
5284 }
5285
5286 unsigned StackArgsSize = 0;
5287
5288 // If the callee takes no arguments then go on to check the results of the
5289 // call.
5290 if (!Outs.empty()) {
5291 // Check if stack adjustment is needed. For now, do not do this if any
5292 // argument is passed on the stack.
5293 SmallVector<CCValAssign, 16> ArgLocs;
5294 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5295
5296 // Allocate shadow area for Win64
5297 if (IsCalleeWin64)
5298 CCInfo.AllocateStack(32, Align(8));
5299
5300 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5301 StackArgsSize = CCInfo.getNextStackOffset();
5302
5303 if (CCInfo.getNextStackOffset()) {
5304 // Check if the arguments are already laid out in the right way as
5305 // the caller's fixed stack objects.
5306 MachineFrameInfo &MFI = MF.getFrameInfo();
5307 const MachineRegisterInfo *MRI = &MF.getRegInfo();
5308 const X86InstrInfo *TII = Subtarget.getInstrInfo();
5309 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5310 CCValAssign &VA = ArgLocs[i];
5311 SDValue Arg = OutVals[i];
5312 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5313 if (VA.getLocInfo() == CCValAssign::Indirect)
5314 return false;
5315 if (!VA.isRegLoc()) {
5316 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
5317 MFI, MRI, TII, VA))
5318 return false;
5319 }
5320 }
5321 }
5322
5323 bool PositionIndependent = isPositionIndependent();
5324 // If the tailcall address may be in a register, then make sure it's
5325 // possible to register allocate for it. In 32-bit, the call address can
5326 // only target EAX, EDX, or ECX since the tail call must be scheduled after
5327 // callee-saved registers are restored. These happen to be the same
5328 // registers used to pass 'inreg' arguments so watch out for those.
5329 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
5330 !isa<ExternalSymbolSDNode>(Callee)) ||
5331 PositionIndependent)) {
5332 unsigned NumInRegs = 0;
5333 // In PIC we need an extra register to formulate the address computation
5334 // for the callee.
5335 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
5336
5337 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5338 CCValAssign &VA = ArgLocs[i];
5339 if (!VA.isRegLoc())
5340 continue;
5341 Register Reg = VA.getLocReg();
5342 switch (Reg) {
5343 default: break;
5344 case X86::EAX: case X86::EDX: case X86::ECX:
5345 if (++NumInRegs == MaxInRegs)
5346 return false;
5347 break;
5348 }
5349 }
5350 }
5351
5352 const MachineRegisterInfo &MRI = MF.getRegInfo();
5353 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5354 return false;
5355 }
5356
5357 bool CalleeWillPop =
5358 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
5359 MF.getTarget().Options.GuaranteedTailCallOpt);
5360
5361 if (unsigned BytesToPop =
5362 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
5363 // If we have bytes to pop, the callee must pop them.
5364 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
5365 if (!CalleePopMatches)
5366 return false;
5367 } else if (CalleeWillPop && StackArgsSize > 0) {
5368 // If we don't have bytes to pop, make sure the callee doesn't pop any.
5369 return false;
5370 }
5371
5372 return true;
5373}
5374
5375FastISel *
5376X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
5377 const TargetLibraryInfo *libInfo) const {
5378 return X86::createFastISel(funcInfo, libInfo);
5379}
5380
5381//===----------------------------------------------------------------------===//
5382// Other Lowering Hooks
5383//===----------------------------------------------------------------------===//
5384
5385bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
5386 bool AssumeSingleUse) {
5387 if (!AssumeSingleUse && !Op.hasOneUse())
5388 return false;
5389 if (!ISD::isNormalLoad(Op.getNode()))
5390 return false;
5391
5392 // If this is an unaligned vector, make sure the target supports folding it.
5393 auto *Ld = cast<LoadSDNode>(Op.getNode());
5394 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
5395 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
5396 return false;
5397
5398 // TODO: If this is a non-temporal load and the target has an instruction
5399 // for it, it should not be folded. See "useNonTemporalLoad()".
5400
5401 return true;
5402}
5403
5404bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
5405 const X86Subtarget &Subtarget,
5406 bool AssumeSingleUse) {
5407 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory")(static_cast <bool> (Subtarget.hasAVX() && "Expected AVX for broadcast from memory"
) ? void (0) : __assert_fail ("Subtarget.hasAVX() && \"Expected AVX for broadcast from memory\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5407, __extension__
__PRETTY_FUNCTION__))
;
5408 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
5409 return false;
5410
5411 // We can not replace a wide volatile load with a broadcast-from-memory,
5412 // because that would narrow the load, which isn't legal for volatiles.
5413 auto *Ld = cast<LoadSDNode>(Op.getNode());
5414 return !Ld->isVolatile() ||
5415 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
5416}
5417
5418bool X86::mayFoldIntoStore(SDValue Op) {
5419 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
5420}
5421
5422bool X86::mayFoldIntoZeroExtend(SDValue Op) {
5423 if (Op.hasOneUse()) {
5424 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
5425 return (ISD::ZERO_EXTEND == Opcode);
5426 }
5427 return false;
5428}
5429
5430static bool isTargetShuffle(unsigned Opcode) {
5431 switch(Opcode) {
5432 default: return false;
5433 case X86ISD::BLENDI:
5434 case X86ISD::PSHUFB:
5435 case X86ISD::PSHUFD:
5436 case X86ISD::PSHUFHW:
5437 case X86ISD::PSHUFLW:
5438 case X86ISD::SHUFP:
5439 case X86ISD::INSERTPS:
5440 case X86ISD::EXTRQI:
5441 case X86ISD::INSERTQI:
5442 case X86ISD::VALIGN:
5443 case X86ISD::PALIGNR:
5444 case X86ISD::VSHLDQ:
5445 case X86ISD::VSRLDQ:
5446 case X86ISD::MOVLHPS:
5447 case X86ISD::MOVHLPS:
5448 case X86ISD::MOVSHDUP:
5449 case X86ISD::MOVSLDUP:
5450 case X86ISD::MOVDDUP:
5451 case X86ISD::MOVSS:
5452 case X86ISD::MOVSD:
5453 case X86ISD::MOVSH:
5454 case X86ISD::UNPCKL:
5455 case X86ISD::UNPCKH:
5456 case X86ISD::VBROADCAST:
5457 case X86ISD::VPERMILPI:
5458 case X86ISD::VPERMILPV:
5459 case X86ISD::VPERM2X128:
5460 case X86ISD::SHUF128:
5461 case X86ISD::VPERMIL2:
5462 case X86ISD::VPERMI:
5463 case X86ISD::VPPERM:
5464 case X86ISD::VPERMV:
5465 case X86ISD::VPERMV3:
5466 case X86ISD::VZEXT_MOVL:
5467 return true;
5468 }
5469}
5470
5471static bool isTargetShuffleVariableMask(unsigned Opcode) {
5472 switch (Opcode) {
5473 default: return false;
5474 // Target Shuffles.
5475 case X86ISD::PSHUFB:
5476 case X86ISD::VPERMILPV:
5477 case X86ISD::VPERMIL2:
5478 case X86ISD::VPPERM:
5479 case X86ISD::VPERMV:
5480 case X86ISD::VPERMV3:
5481 return true;
5482 // 'Faux' Target Shuffles.
5483 case ISD::OR:
5484 case ISD::AND:
5485 case X86ISD::ANDNP:
5486 return true;
5487 }
5488}
5489
5490SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
5491 MachineFunction &MF = DAG.getMachineFunction();
5492 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5493 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
5494 int ReturnAddrIndex = FuncInfo->getRAIndex();
5495
5496 if (ReturnAddrIndex == 0) {
5497 // Set up a frame object for the return address.
5498 unsigned SlotSize = RegInfo->getSlotSize();
5499 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
5500 -(int64_t)SlotSize,
5501 false);
5502 FuncInfo->setRAIndex(ReturnAddrIndex);
5503 }
5504
5505 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
5506}
5507
5508bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
5509 bool hasSymbolicDisplacement) {
5510 // Offset should fit into 32 bit immediate field.
5511 if (!isInt<32>(Offset))
5512 return false;
5513
5514 // If we don't have a symbolic displacement - we don't have any extra
5515 // restrictions.
5516 if (!hasSymbolicDisplacement)
5517 return true;
5518
5519 // FIXME: Some tweaks might be needed for medium code model.
5520 if (M != CodeModel::Small && M != CodeModel::Kernel)
5521 return false;
5522
5523 // For small code model we assume that latest object is 16MB before end of 31
5524 // bits boundary. We may also accept pretty large negative constants knowing
5525 // that all objects are in the positive half of address space.
5526 if (M == CodeModel::Small && Offset < 16*1024*1024)
5527 return true;
5528
5529 // For kernel code model we know that all object resist in the negative half
5530 // of 32bits address space. We may not accept negative offsets, since they may
5531 // be just off and we may accept pretty large positive ones.
5532 if (M == CodeModel::Kernel && Offset >= 0)
5533 return true;
5534
5535 return false;
5536}
5537
5538/// Determines whether the callee is required to pop its own arguments.
5539/// Callee pop is necessary to support tail calls.
5540bool X86::isCalleePop(CallingConv::ID CallingConv,
5541 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
5542 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
5543 // can guarantee TCO.
5544 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
5545 return true;
5546
5547 switch (CallingConv) {
5548 default:
5549 return false;
5550 case CallingConv::X86_StdCall:
5551 case CallingConv::X86_FastCall:
5552 case CallingConv::X86_ThisCall:
5553 case CallingConv::X86_VectorCall:
5554 return !is64Bit;
5555 }
5556}
5557
5558/// Return true if the condition is an signed comparison operation.
5559static bool isX86CCSigned(unsigned X86CC) {
5560 switch (X86CC) {
5561 default:
5562 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5562)
;
5563 case X86::COND_E:
5564 case X86::COND_NE:
5565 case X86::COND_B:
5566 case X86::COND_A:
5567 case X86::COND_BE:
5568 case X86::COND_AE:
5569 return false;
5570 case X86::COND_G:
5571 case X86::COND_GE:
5572 case X86::COND_L:
5573 case X86::COND_LE:
5574 return true;
5575 }
5576}
5577
5578static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5579 switch (SetCCOpcode) {
5580 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5580)
;
5581 case ISD::SETEQ: return X86::COND_E;
5582 case ISD::SETGT: return X86::COND_G;
5583 case ISD::SETGE: return X86::COND_GE;
5584 case ISD::SETLT: return X86::COND_L;
5585 case ISD::SETLE: return X86::COND_LE;
5586 case ISD::SETNE: return X86::COND_NE;
5587 case ISD::SETULT: return X86::COND_B;
5588 case ISD::SETUGT: return X86::COND_A;
5589 case ISD::SETULE: return X86::COND_BE;
5590 case ISD::SETUGE: return X86::COND_AE;
5591 }
5592}
5593
5594/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5595/// condition code, returning the condition code and the LHS/RHS of the
5596/// comparison to make.
5597static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5598 bool isFP, SDValue &LHS, SDValue &RHS,
5599 SelectionDAG &DAG) {
5600 if (!isFP) {
5601 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5602 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
5603 // X > -1 -> X == 0, jump !sign.
5604 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5605 return X86::COND_NS;
5606 }
5607 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
5608 // X < 0 -> X == 0, jump on sign.
5609 return X86::COND_S;
5610 }
5611 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
5612 // X >= 0 -> X == 0, jump on !sign.
5613 return X86::COND_NS;
5614 }
5615 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5616 // X < 1 -> X <= 0
5617 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5618 return X86::COND_LE;
5619 }
5620 }
5621
5622 return TranslateIntegerX86CC(SetCCOpcode);
5623 }
5624
5625 // First determine if it is required or is profitable to flip the operands.
5626
5627 // If LHS is a foldable load, but RHS is not, flip the condition.
5628 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5629 !ISD::isNON_EXTLoad(RHS.getNode())) {
5630 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5631 std::swap(LHS, RHS);
5632 }
5633
5634 switch (SetCCOpcode) {
5635 default: break;
5636 case ISD::SETOLT:
5637 case ISD::SETOLE:
5638 case ISD::SETUGT:
5639 case ISD::SETUGE:
5640 std::swap(LHS, RHS);
5641 break;
5642 }
5643
5644 // On a floating point condition, the flags are set as follows:
5645 // ZF PF CF op
5646 // 0 | 0 | 0 | X > Y
5647 // 0 | 0 | 1 | X < Y
5648 // 1 | 0 | 0 | X == Y
5649 // 1 | 1 | 1 | unordered
5650 switch (SetCCOpcode) {
5651 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5651)
;
5652 case ISD::SETUEQ:
5653 case ISD::SETEQ: return X86::COND_E;
5654 case ISD::SETOLT: // flipped
5655 case ISD::SETOGT:
5656 case ISD::SETGT: return X86::COND_A;
5657 case ISD::SETOLE: // flipped
5658 case ISD::SETOGE:
5659 case ISD::SETGE: return X86::COND_AE;
5660 case ISD::SETUGT: // flipped
5661 case ISD::SETULT:
5662 case ISD::SETLT: return X86::COND_B;
5663 case ISD::SETUGE: // flipped
5664 case ISD::SETULE:
5665 case ISD::SETLE: return X86::COND_BE;
5666 case ISD::SETONE:
5667 case ISD::SETNE: return X86::COND_NE;
5668 case ISD::SETUO: return X86::COND_P;
5669 case ISD::SETO: return X86::COND_NP;
5670 case ISD::SETOEQ:
5671 case ISD::SETUNE: return X86::COND_INVALID;
5672 }
5673}
5674
5675/// Is there a floating point cmov for the specific X86 condition code?
5676/// Current x86 isa includes the following FP cmov instructions:
5677/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5678static bool hasFPCMov(unsigned X86CC) {
5679 switch (X86CC) {
5680 default:
5681 return false;
5682 case X86::COND_B:
5683 case X86::COND_BE:
5684 case X86::COND_E:
5685 case X86::COND_P:
5686 case X86::COND_A:
5687 case X86::COND_AE:
5688 case X86::COND_NE:
5689 case X86::COND_NP:
5690 return true;
5691 }
5692}
5693
5694static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
5695 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
5696 VT.is512BitVector();
5697}
5698
5699bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5700 const CallInst &I,
5701 MachineFunction &MF,
5702 unsigned Intrinsic) const {
5703 Info.flags = MachineMemOperand::MONone;
5704 Info.offset = 0;
5705
5706 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5707 if (!IntrData) {
5708 switch (Intrinsic) {
5709 case Intrinsic::x86_aesenc128kl:
5710 case Intrinsic::x86_aesdec128kl:
5711 Info.opc = ISD::INTRINSIC_W_CHAIN;
5712 Info.ptrVal = I.getArgOperand(1);
5713 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5714 Info.align = Align(1);
5715 Info.flags |= MachineMemOperand::MOLoad;
5716 return true;
5717 case Intrinsic::x86_aesenc256kl:
5718 case Intrinsic::x86_aesdec256kl:
5719 Info.opc = ISD::INTRINSIC_W_CHAIN;
5720 Info.ptrVal = I.getArgOperand(1);
5721 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5722 Info.align = Align(1);
5723 Info.flags |= MachineMemOperand::MOLoad;
5724 return true;
5725 case Intrinsic::x86_aesencwide128kl:
5726 case Intrinsic::x86_aesdecwide128kl:
5727 Info.opc = ISD::INTRINSIC_W_CHAIN;
5728 Info.ptrVal = I.getArgOperand(0);
5729 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5730 Info.align = Align(1);
5731 Info.flags |= MachineMemOperand::MOLoad;
5732 return true;
5733 case Intrinsic::x86_aesencwide256kl:
5734 case Intrinsic::x86_aesdecwide256kl:
5735 Info.opc = ISD::INTRINSIC_W_CHAIN;
5736 Info.ptrVal = I.getArgOperand(0);
5737 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5738 Info.align = Align(1);
5739 Info.flags |= MachineMemOperand::MOLoad;
5740 return true;
5741 case Intrinsic::x86_cmpccxadd32:
5742 case Intrinsic::x86_cmpccxadd64:
5743 case Intrinsic::x86_atomic_bts:
5744 case Intrinsic::x86_atomic_btc:
5745 case Intrinsic::x86_atomic_btr: {
5746 Info.opc = ISD::INTRINSIC_W_CHAIN;
5747 Info.ptrVal = I.getArgOperand(0);
5748 unsigned Size = I.getType()->getScalarSizeInBits();
5749 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5750 Info.align = Align(Size);
5751 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5752 MachineMemOperand::MOVolatile;
5753 return true;
5754 }
5755 case Intrinsic::x86_atomic_bts_rm:
5756 case Intrinsic::x86_atomic_btc_rm:
5757 case Intrinsic::x86_atomic_btr_rm: {
5758 Info.opc = ISD::INTRINSIC_W_CHAIN;
5759 Info.ptrVal = I.getArgOperand(0);
5760 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
5761 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5762 Info.align = Align(Size);
5763 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5764 MachineMemOperand::MOVolatile;
5765 return true;
5766 }
5767 case Intrinsic::x86_aadd32:
5768 case Intrinsic::x86_aadd64:
5769 case Intrinsic::x86_aand32:
5770 case Intrinsic::x86_aand64:
5771 case Intrinsic::x86_aor32:
5772 case Intrinsic::x86_aor64:
5773 case Intrinsic::x86_axor32:
5774 case Intrinsic::x86_axor64:
5775 case Intrinsic::x86_atomic_add_cc:
5776 case Intrinsic::x86_atomic_sub_cc:
5777 case Intrinsic::x86_atomic_or_cc:
5778 case Intrinsic::x86_atomic_and_cc:
5779 case Intrinsic::x86_atomic_xor_cc: {
5780 Info.opc = ISD::INTRINSIC_W_CHAIN;
5781 Info.ptrVal = I.getArgOperand(0);
5782 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
5783 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5784 Info.align = Align(Size);
5785 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5786 MachineMemOperand::MOVolatile;
5787 return true;
5788 }
5789 }
5790 return false;
5791 }
5792
5793 switch (IntrData->Type) {
5794 case TRUNCATE_TO_MEM_VI8:
5795 case TRUNCATE_TO_MEM_VI16:
5796 case TRUNCATE_TO_MEM_VI32: {
5797 Info.opc = ISD::INTRINSIC_VOID;
5798 Info.ptrVal = I.getArgOperand(0);
5799 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5800 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5801 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5802 ScalarVT = MVT::i8;
5803 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5804 ScalarVT = MVT::i16;
5805 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5806 ScalarVT = MVT::i32;
5807
5808 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5809 Info.align = Align(1);
5810 Info.flags |= MachineMemOperand::MOStore;
5811 break;
5812 }
5813 case GATHER:
5814 case GATHER_AVX2: {
5815 Info.opc = ISD::INTRINSIC_W_CHAIN;
5816 Info.ptrVal = nullptr;
5817 MVT DataVT = MVT::getVT(I.getType());
5818 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5819 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5820 IndexVT.getVectorNumElements());
5821 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5822 Info.align = Align(1);
5823 Info.flags |= MachineMemOperand::MOLoad;
5824 break;
5825 }
5826 case SCATTER: {
5827 Info.opc = ISD::INTRINSIC_VOID;
5828 Info.ptrVal = nullptr;
5829 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5830 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5831 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5832 IndexVT.getVectorNumElements());
5833 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5834 Info.align = Align(1);
5835 Info.flags |= MachineMemOperand::MOStore;
5836 break;
5837 }
5838 default:
5839 return false;
5840 }
5841
5842 return true;
5843}
5844
5845/// Returns true if the target can instruction select the
5846/// specified FP immediate natively. If false, the legalizer will
5847/// materialize the FP immediate as a load from a constant pool.
5848bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5849 bool ForCodeSize) const {
5850 for (const APFloat &FPImm : LegalFPImmediates)
5851 if (Imm.bitwiseIsEqual(FPImm))
5852 return true;
5853 return false;
5854}
5855
5856bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5857 ISD::LoadExtType ExtTy,
5858 EVT NewVT) const {
5859 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5859, __extension__
__PRETTY_FUNCTION__))
;
5860
5861 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5862 // relocation target a movq or addq instruction: don't let the load shrink.
5863 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5864 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5865 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5866 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5867
5868 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5869 // those uses are extracted directly into a store, then the extract + store
5870 // can be store-folded. Therefore, it's probably not worth splitting the load.
5871 EVT VT = Load->getValueType(0);
5872 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5873 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5874 // Skip uses of the chain value. Result 0 of the node is the load value.
5875 if (UI.getUse().getResNo() != 0)
5876 continue;
5877
5878 // If this use is not an extract + store, it's probably worth splitting.
5879 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5880 UI->use_begin()->getOpcode() != ISD::STORE)
5881 return true;
5882 }
5883 // All non-chain uses are extract + store.
5884 return false;
5885 }
5886
5887 return true;
5888}
5889
5890/// Returns true if it is beneficial to convert a load of a constant
5891/// to just the constant itself.
5892bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5893 Type *Ty) const {
5894 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5894, __extension__ __PRETTY_FUNCTION__))
;
5895
5896 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5897 if (BitSize == 0 || BitSize > 64)
5898 return false;
5899 return true;
5900}
5901
5902bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5903 // If we are using XMM registers in the ABI and the condition of the select is
5904 // a floating-point compare and we have blendv or conditional move, then it is
5905 // cheaper to select instead of doing a cross-register move and creating a
5906 // load that depends on the compare result.
5907 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5908 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5909}
5910
5911bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5912 // TODO: It might be a win to ease or lift this restriction, but the generic
5913 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5914 if (VT.isVector() && Subtarget.hasAVX512())
5915 return false;
5916
5917 return true;
5918}
5919
5920bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5921 SDValue C) const {
5922 // TODO: We handle scalars using custom code, but generic combining could make
5923 // that unnecessary.
5924 APInt MulC;
5925 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5926 return false;
5927
5928 // Find the type this will be legalized too. Otherwise we might prematurely
5929 // convert this to shl+add/sub and then still have to type legalize those ops.
5930 // Another choice would be to defer the decision for illegal types until
5931 // after type legalization. But constant splat vectors of i64 can't make it
5932 // through type legalization on 32-bit targets so we would need to special
5933 // case vXi64.
5934 while (getTypeAction(Context, VT) != TypeLegal)
5935 VT = getTypeToTransformTo(Context, VT);
5936
5937 // If vector multiply is legal, assume that's faster than shl + add/sub.
5938 // Multiply is a complex op with higher latency and lower throughput in
5939 // most implementations, sub-vXi32 vector multiplies are always fast,
5940 // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
5941 // is always going to be slow.
5942 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5943 if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
5944 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
5945 return false;
5946
5947 // shl+add, shl+sub, shl+add+neg
5948 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5949 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5950}
5951
5952bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5953 unsigned Index) const {
5954 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5955 return false;
5956
5957 // Mask vectors support all subregister combinations and operations that
5958 // extract half of vector.
5959 if (ResVT.getVectorElementType() == MVT::i1)
5960 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&