Bug Summary

File:build/source/llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 17608, column 31
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-17/lib/clang/17 -I lib/Target/X86 -I /build/source/llvm/lib/Target/X86 -I include -I /build/source/llvm/include -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-04-16-131055-16441-1 -x c++ /build/source/llvm/lib/Target/X86/X86ISelLowering.cpp
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/ObjCARCUtil.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/IntrinsicLowering.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineJumpTableInfo.h"
38#include "llvm/CodeGen/MachineLoopInfo.h"
39#include "llvm/CodeGen/MachineModuleInfo.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/TargetLowering.h"
42#include "llvm/CodeGen/WinEHFuncInfo.h"
43#include "llvm/IR/CallingConv.h"
44#include "llvm/IR/Constants.h"
45#include "llvm/IR/DerivedTypes.h"
46#include "llvm/IR/DiagnosticInfo.h"
47#include "llvm/IR/EHPersonalities.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/PatternMatch.h"
55#include "llvm/MC/MCAsmInfo.h"
56#include "llvm/MC/MCContext.h"
57#include "llvm/MC/MCExpr.h"
58#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <bitset>
67#include <cctype>
68#include <numeric>
69using namespace llvm;
70
71#define DEBUG_TYPE"x86-isel" "x86-isel"
72
73STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
74
75static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
76 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
77 cl::desc(
78 "Sets the preferable loop alignment for experiments (as log2 bytes) "
79 "for innermost loops only. If specified, this option overrides "
80 "alignment set by x86-experimental-pref-loop-alignment."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107/// Returns true if a CC can dynamically exclude a register from the list of
108/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
109/// the return registers.
110static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
111 switch (CC) {
112 default:
113 return false;
114 case CallingConv::X86_RegCall:
115 case CallingConv::PreserveMost:
116 case CallingConv::PreserveAll:
117 return true;
118 }
119}
120
121/// Returns true if a CC can dynamically exclude a register from the list of
122/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
123/// the parameters.
124static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
125 return CC == CallingConv::X86_RegCall;
126}
127
128X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
129 const X86Subtarget &STI)
130 : TargetLowering(TM), Subtarget(STI) {
131 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
132 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
133
134 // Set up the TargetLowering object.
135
136 // X86 is weird. It always uses i8 for shift amounts and setcc results.
137 setBooleanContents(ZeroOrOneBooleanContent);
138 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
139 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
140
141 // For 64-bit, since we have so many registers, use the ILP scheduler.
142 // For 32-bit, use the register pressure specific scheduling.
143 // For Atom, always use ILP scheduling.
144 if (Subtarget.isAtom())
145 setSchedulingPreference(Sched::ILP);
146 else if (Subtarget.is64Bit())
147 setSchedulingPreference(Sched::ILP);
148 else
149 setSchedulingPreference(Sched::RegPressure);
150 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
151 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
152
153 // Bypass expensive divides and use cheaper ones.
154 if (TM.getOptLevel() >= CodeGenOpt::Default) {
155 if (Subtarget.hasSlowDivide32())
156 addBypassSlowDiv(32, 8);
157 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
158 addBypassSlowDiv(64, 32);
159 }
160
161 // Setup Windows compiler runtime calls.
162 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
163 static const struct {
164 const RTLIB::Libcall Op;
165 const char * const Name;
166 const CallingConv::ID CC;
167 } LibraryCalls[] = {
168 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
169 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
170 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
171 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
172 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
173 };
174
175 for (const auto &LC : LibraryCalls) {
176 setLibcallName(LC.Op, LC.Name);
177 setLibcallCallingConv(LC.Op, LC.CC);
178 }
179 }
180
181 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
182 // MSVCRT doesn't have powi; fall back to pow
183 setLibcallName(RTLIB::POWI_F32, nullptr);
184 setLibcallName(RTLIB::POWI_F64, nullptr);
185 }
186
187 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
188 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
189 // FIXME: Should we be limiting the atomic size on other configs? Default is
190 // 1024.
191 if (!Subtarget.canUseCMPXCHG8B())
192 setMaxAtomicSizeInBitsSupported(32);
193
194 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
195
196 setMaxLargeFPConvertBitWidthSupported(128);
197
198 // Set up the register classes.
199 addRegisterClass(MVT::i8, &X86::GR8RegClass);
200 addRegisterClass(MVT::i16, &X86::GR16RegClass);
201 addRegisterClass(MVT::i32, &X86::GR32RegClass);
202 if (Subtarget.is64Bit())
203 addRegisterClass(MVT::i64, &X86::GR64RegClass);
204
205 for (MVT VT : MVT::integer_valuetypes())
206 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
207
208 // We don't accept any truncstore of integer registers.
209 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
210 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
211 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
212 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
213 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
214 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
215
216 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
217
218 // SETOEQ and SETUNE require checking two conditions.
219 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
220 setCondCodeAction(ISD::SETOEQ, VT, Expand);
221 setCondCodeAction(ISD::SETUNE, VT, Expand);
222 }
223
224 // Integer absolute.
225 if (Subtarget.canUseCMOV()) {
226 setOperationAction(ISD::ABS , MVT::i16 , Custom);
227 setOperationAction(ISD::ABS , MVT::i32 , Custom);
228 if (Subtarget.is64Bit())
229 setOperationAction(ISD::ABS , MVT::i64 , Custom);
230 }
231
232 // Signed saturation subtraction.
233 setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
234 setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
235 setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
236 if (Subtarget.is64Bit())
237 setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
238
239 // Funnel shifts.
240 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
241 // For slow shld targets we only lower for code size.
242 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
243
244 setOperationAction(ShiftOp , MVT::i8 , Custom);
245 setOperationAction(ShiftOp , MVT::i16 , Custom);
246 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
247 if (Subtarget.is64Bit())
248 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
249 }
250
251 if (!Subtarget.useSoftFloat()) {
252 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
253 // operation.
254 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
255 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
256 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
257 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
258 // We have an algorithm for SSE2, and we turn this into a 64-bit
259 // FILD or VCVTUSI2SS/SD for other targets.
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
261 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
262 // We have an algorithm for SSE2->double, and we turn this into a
263 // 64-bit FILD followed by conditional FADD for other targets.
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
266
267 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
268 // this operation.
269 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
270 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
271 // SSE has no i16 to fp conversion, only i32. We promote in the handler
272 // to allow f80 to use i16 and f64 to use i16 with sse1 only
273 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
274 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
275 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
276 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
277 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
278 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
279 // are Legal, f80 is custom lowered.
280 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
281 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
282
283 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
284 // this operation.
285 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
286 // FIXME: This doesn't generate invalid exception when it should. PR44019.
287 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
288 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
289 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
290 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
291 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
292 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
293 // are Legal, f80 is custom lowered.
294 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
295 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
296
297 // Handle FP_TO_UINT by promoting the destination to a larger signed
298 // conversion.
299 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
300 // FIXME: This doesn't generate invalid exception when it should. PR44019.
301 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
302 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
303 // FIXME: This doesn't generate invalid exception when it should. PR44019.
304 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
305 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
306 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
307 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
308 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
309
310 setOperationAction(ISD::LRINT, MVT::f32, Custom);
311 setOperationAction(ISD::LRINT, MVT::f64, Custom);
312 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
313 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
314
315 if (!Subtarget.is64Bit()) {
316 setOperationAction(ISD::LRINT, MVT::i64, Custom);
317 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
318 }
319 }
320
321 if (Subtarget.hasSSE2()) {
322 // Custom lowering for saturating float to int conversions.
323 // We handle promotion to larger result types manually.
324 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
325 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
326 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
327 }
328 if (Subtarget.is64Bit()) {
329 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
330 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
331 }
332 }
333
334 // Handle address space casts between mixed sized pointers.
335 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
336 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
337
338 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
339 if (!Subtarget.hasSSE2()) {
340 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
341 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
342 if (Subtarget.is64Bit()) {
343 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
344 // Without SSE, i64->f64 goes through memory.
345 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
346 }
347 } else if (!Subtarget.is64Bit())
348 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
349
350 // Scalar integer divide and remainder are lowered to use operations that
351 // produce two results, to match the available instructions. This exposes
352 // the two-result form to trivial CSE, which is able to combine x/y and x%y
353 // into a single instruction.
354 //
355 // Scalar integer multiply-high is also lowered to use two-result
356 // operations, to match the available instructions. However, plain multiply
357 // (low) operations are left as Legal, as there are single-result
358 // instructions for this in x86. Using the two-result multiply instructions
359 // when both high and low results are needed must be arranged by dagcombine.
360 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
361 setOperationAction(ISD::MULHS, VT, Expand);
362 setOperationAction(ISD::MULHU, VT, Expand);
363 setOperationAction(ISD::SDIV, VT, Expand);
364 setOperationAction(ISD::UDIV, VT, Expand);
365 setOperationAction(ISD::SREM, VT, Expand);
366 setOperationAction(ISD::UREM, VT, Expand);
367 }
368
369 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
370 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
371 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
372 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
373 setOperationAction(ISD::BR_CC, VT, Expand);
374 setOperationAction(ISD::SELECT_CC, VT, Expand);
375 }
376 if (Subtarget.is64Bit())
377 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
378 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
379 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
380 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
381
382 setOperationAction(ISD::FREM , MVT::f32 , Expand);
383 setOperationAction(ISD::FREM , MVT::f64 , Expand);
384 setOperationAction(ISD::FREM , MVT::f80 , Expand);
385 setOperationAction(ISD::FREM , MVT::f128 , Expand);
386
387 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
388 setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
389 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
390 }
391
392 // Promote the i8 variants and force them on up to i32 which has a shorter
393 // encoding.
394 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
395 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
396 // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
397 // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
398 // promote that too.
399 setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
400 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
401
402 if (!Subtarget.hasBMI()) {
403 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
404 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
405 if (Subtarget.is64Bit()) {
406 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
407 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
408 }
409 }
410
411 if (Subtarget.hasLZCNT()) {
412 // When promoting the i8 variants, force them to i32 for a shorter
413 // encoding.
414 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
415 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
416 } else {
417 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
418 if (VT == MVT::i64 && !Subtarget.is64Bit())
419 continue;
420 setOperationAction(ISD::CTLZ , VT, Custom);
421 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
422 }
423 }
424
425 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
426 ISD::STRICT_FP_TO_FP16}) {
427 // Special handling for half-precision floating point conversions.
428 // If we don't have F16C support, then lower half float conversions
429 // into library calls.
430 setOperationAction(
431 Op, MVT::f32,
432 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
433 // There's never any support for operations beyond MVT::f32.
434 setOperationAction(Op, MVT::f64, Expand);
435 setOperationAction(Op, MVT::f80, Expand);
436 setOperationAction(Op, MVT::f128, Expand);
437 }
438
439 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
440 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
441 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
442 setTruncStoreAction(VT, MVT::f16, Expand);
443 setTruncStoreAction(VT, MVT::bf16, Expand);
444
445 setOperationAction(ISD::BF16_TO_FP, VT, Expand);
446 setOperationAction(ISD::FP_TO_BF16, VT, Custom);
447 }
448
449 setOperationAction(ISD::PARITY, MVT::i8, Custom);
450 setOperationAction(ISD::PARITY, MVT::i16, Custom);
451 setOperationAction(ISD::PARITY, MVT::i32, Custom);
452 if (Subtarget.is64Bit())
453 setOperationAction(ISD::PARITY, MVT::i64, Custom);
454 if (Subtarget.hasPOPCNT()) {
455 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
456 // popcntw is longer to encode than popcntl and also has a false dependency
457 // on the dest that popcntl hasn't had since Cannon Lake.
458 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
459 } else {
460 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
461 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
462 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
463 if (Subtarget.is64Bit())
464 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
465 else
466 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
467 }
468
469 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
470
471 if (!Subtarget.hasMOVBE())
472 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
473
474 // X86 wants to expand cmov itself.
475 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
476 setOperationAction(ISD::SELECT, VT, Custom);
477 setOperationAction(ISD::SETCC, VT, Custom);
478 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
479 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
480 }
481 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
482 if (VT == MVT::i64 && !Subtarget.is64Bit())
483 continue;
484 setOperationAction(ISD::SELECT, VT, Custom);
485 setOperationAction(ISD::SETCC, VT, Custom);
486 }
487
488 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
489 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
490 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
491
492 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
493 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
494 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
495 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
496 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
497 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
498 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
499 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
500
501 // Darwin ABI issue.
502 for (auto VT : { MVT::i32, MVT::i64 }) {
503 if (VT == MVT::i64 && !Subtarget.is64Bit())
504 continue;
505 setOperationAction(ISD::ConstantPool , VT, Custom);
506 setOperationAction(ISD::JumpTable , VT, Custom);
507 setOperationAction(ISD::GlobalAddress , VT, Custom);
508 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
509 setOperationAction(ISD::ExternalSymbol , VT, Custom);
510 setOperationAction(ISD::BlockAddress , VT, Custom);
511 }
512
513 // 64-bit shl, sra, srl (iff 32-bit x86)
514 for (auto VT : { MVT::i32, MVT::i64 }) {
515 if (VT == MVT::i64 && !Subtarget.is64Bit())
516 continue;
517 setOperationAction(ISD::SHL_PARTS, VT, Custom);
518 setOperationAction(ISD::SRA_PARTS, VT, Custom);
519 setOperationAction(ISD::SRL_PARTS, VT, Custom);
520 }
521
522 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
523 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
524
525 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
526
527 // Expand certain atomics
528 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
529 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
530 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
531 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
532 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
533 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
534 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
535 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
536 }
537
538 if (!Subtarget.is64Bit())
539 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
540
541 if (Subtarget.canUseCMPXCHG16B())
542 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
543
544 // FIXME - use subtarget debug flags
545 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
546 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
547 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
548 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
549 }
550
551 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
552 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
553
554 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
555 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
556
557 setOperationAction(ISD::TRAP, MVT::Other, Legal);
558 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
559 if (Subtarget.isTargetPS())
560 setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
561 else
562 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
563
564 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
565 setOperationAction(ISD::VASTART , MVT::Other, Custom);
566 setOperationAction(ISD::VAEND , MVT::Other, Expand);
567 bool Is64Bit = Subtarget.is64Bit();
568 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
569 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
570
571 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
572 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
573
574 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
575
576 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
577 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
578 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
579
580 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
581
582 auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
583 setOperationAction(ISD::FABS, VT, Action);
584 setOperationAction(ISD::FNEG, VT, Action);
585 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
586 setOperationAction(ISD::FREM, VT, Action);
587 setOperationAction(ISD::FMA, VT, Action);
588 setOperationAction(ISD::FMINNUM, VT, Action);
589 setOperationAction(ISD::FMAXNUM, VT, Action);
590 setOperationAction(ISD::FMINIMUM, VT, Action);
591 setOperationAction(ISD::FMAXIMUM, VT, Action);
592 setOperationAction(ISD::FSIN, VT, Action);
593 setOperationAction(ISD::FCOS, VT, Action);
594 setOperationAction(ISD::FSINCOS, VT, Action);
595 setOperationAction(ISD::FSQRT, VT, Action);
596 setOperationAction(ISD::FPOW, VT, Action);
597 setOperationAction(ISD::FLOG, VT, Action);
598 setOperationAction(ISD::FLOG2, VT, Action);
599 setOperationAction(ISD::FLOG10, VT, Action);
600 setOperationAction(ISD::FEXP, VT, Action);
601 setOperationAction(ISD::FEXP2, VT, Action);
602 setOperationAction(ISD::FCEIL, VT, Action);
603 setOperationAction(ISD::FFLOOR, VT, Action);
604 setOperationAction(ISD::FNEARBYINT, VT, Action);
605 setOperationAction(ISD::FRINT, VT, Action);
606 setOperationAction(ISD::BR_CC, VT, Action);
607 setOperationAction(ISD::SETCC, VT, Action);
608 setOperationAction(ISD::SELECT, VT, Custom);
609 setOperationAction(ISD::SELECT_CC, VT, Action);
610 setOperationAction(ISD::FROUND, VT, Action);
611 setOperationAction(ISD::FROUNDEVEN, VT, Action);
612 setOperationAction(ISD::FTRUNC, VT, Action);
613 };
614
615 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
616 // f16, f32 and f64 use SSE.
617 // Set up the FP register classes.
618 addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
619 : &X86::FR16RegClass);
620 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
621 : &X86::FR32RegClass);
622 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
623 : &X86::FR64RegClass);
624
625 // Disable f32->f64 extload as we can only generate this in one instruction
626 // under optsize. So its easier to pattern match (fpext (load)) for that
627 // case instead of needing to emit 2 instructions for extload in the
628 // non-optsize case.
629 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
630
631 for (auto VT : { MVT::f32, MVT::f64 }) {
632 // Use ANDPD to simulate FABS.
633 setOperationAction(ISD::FABS, VT, Custom);
634
635 // Use XORP to simulate FNEG.
636 setOperationAction(ISD::FNEG, VT, Custom);
637
638 // Use ANDPD and ORPD to simulate FCOPYSIGN.
639 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
640
641 // These might be better off as horizontal vector ops.
642 setOperationAction(ISD::FADD, VT, Custom);
643 setOperationAction(ISD::FSUB, VT, Custom);
644
645 // We don't support sin/cos/fmod
646 setOperationAction(ISD::FSIN , VT, Expand);
647 setOperationAction(ISD::FCOS , VT, Expand);
648 setOperationAction(ISD::FSINCOS, VT, Expand);
649 }
650
651 // Half type will be promoted by default.
652 setF16Action(MVT::f16, Promote);
653 setOperationAction(ISD::FADD, MVT::f16, Promote);
654 setOperationAction(ISD::FSUB, MVT::f16, Promote);
655 setOperationAction(ISD::FMUL, MVT::f16, Promote);
656 setOperationAction(ISD::FDIV, MVT::f16, Promote);
657 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
658 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
659 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
660
661 setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote);
662 setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote);
663 setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote);
664 setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote);
665 setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote);
666 setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote);
667 setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote);
668 setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote);
669 setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
670 setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
671 setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
672 setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
673 setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
674 setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
675 setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote);
676 setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote);
677 setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote);
678 setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote);
679 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote);
680 setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote);
681 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote);
682 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote);
683 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
684 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
685 setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
686 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
687 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
688 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
689
690 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
691 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
692
693 // Lower this to MOVMSK plus an AND.
694 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
695 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
696
697 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
698 (UseX87 || Is64Bit)) {
699 // Use SSE for f32, x87 for f64.
700 // Set up the FP register classes.
701 addRegisterClass(MVT::f32, &X86::FR32RegClass);
702 if (UseX87)
703 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
704
705 // Use ANDPS to simulate FABS.
706 setOperationAction(ISD::FABS , MVT::f32, Custom);
707
708 // Use XORP to simulate FNEG.
709 setOperationAction(ISD::FNEG , MVT::f32, Custom);
710
711 if (UseX87)
712 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
713
714 // Use ANDPS and ORPS to simulate FCOPYSIGN.
715 if (UseX87)
716 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
717 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
718
719 // We don't support sin/cos/fmod
720 setOperationAction(ISD::FSIN , MVT::f32, Expand);
721 setOperationAction(ISD::FCOS , MVT::f32, Expand);
722 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
723
724 if (UseX87) {
725 // Always expand sin/cos functions even though x87 has an instruction.
726 setOperationAction(ISD::FSIN, MVT::f64, Expand);
727 setOperationAction(ISD::FCOS, MVT::f64, Expand);
728 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
729 }
730 } else if (UseX87) {
731 // f32 and f64 in x87.
732 // Set up the FP register classes.
733 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
734 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
735
736 for (auto VT : { MVT::f32, MVT::f64 }) {
737 setOperationAction(ISD::UNDEF, VT, Expand);
738 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
739
740 // Always expand sin/cos functions even though x87 has an instruction.
741 setOperationAction(ISD::FSIN , VT, Expand);
742 setOperationAction(ISD::FCOS , VT, Expand);
743 setOperationAction(ISD::FSINCOS, VT, Expand);
744 }
745 }
746
747 // Expand FP32 immediates into loads from the stack, save special cases.
748 if (isTypeLegal(MVT::f32)) {
749 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
750 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
751 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
752 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
753 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
754 } else // SSE immediates.
755 addLegalFPImmediate(APFloat(+0.0f)); // xorps
756 }
757 // Expand FP64 immediates into loads from the stack, save special cases.
758 if (isTypeLegal(MVT::f64)) {
759 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
760 addLegalFPImmediate(APFloat(+0.0)); // FLD0
761 addLegalFPImmediate(APFloat(+1.0)); // FLD1
762 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
763 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
764 } else // SSE immediates.
765 addLegalFPImmediate(APFloat(+0.0)); // xorpd
766 }
767 // Support fp16 0 immediate.
768 if (isTypeLegal(MVT::f16))
769 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
770
771 // Handle constrained floating-point operations of scalar.
772 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
773 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
774 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
775 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
776 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
777 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
778 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
779 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
780 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
781 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
782 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
783 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
784
785 // We don't support FMA.
786 setOperationAction(ISD::FMA, MVT::f64, Expand);
787 setOperationAction(ISD::FMA, MVT::f32, Expand);
788
789 // f80 always uses X87.
790 if (UseX87) {
791 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
792 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
793 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
794 {
795 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
796 addLegalFPImmediate(TmpFlt); // FLD0
797 TmpFlt.changeSign();
798 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
799
800 bool ignored;
801 APFloat TmpFlt2(+1.0);
802 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
803 &ignored);
804 addLegalFPImmediate(TmpFlt2); // FLD1
805 TmpFlt2.changeSign();
806 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
807 }
808
809 // Always expand sin/cos functions even though x87 has an instruction.
810 setOperationAction(ISD::FSIN , MVT::f80, Expand);
811 setOperationAction(ISD::FCOS , MVT::f80, Expand);
812 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
813
814 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
815 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
816 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
817 setOperationAction(ISD::FRINT, MVT::f80, Expand);
818 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
819 setOperationAction(ISD::FMA, MVT::f80, Expand);
820 setOperationAction(ISD::LROUND, MVT::f80, Expand);
821 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
822 setOperationAction(ISD::LRINT, MVT::f80, Custom);
823 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
824
825 // Handle constrained floating-point operations of scalar.
826 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
827 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
828 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
829 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
830 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
831 if (isTypeLegal(MVT::f16)) {
832 setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
833 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
834 } else {
835 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
836 }
837 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
838 // as Custom.
839 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
840 }
841
842 // f128 uses xmm registers, but most operations require libcalls.
843 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
844 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
845 : &X86::VR128RegClass);
846
847 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
848
849 setOperationAction(ISD::FADD, MVT::f128, LibCall);
850 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
851 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
852 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
853 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
854 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
855 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
856 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
857 setOperationAction(ISD::FMA, MVT::f128, LibCall);
858 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
859
860 setOperationAction(ISD::FABS, MVT::f128, Custom);
861 setOperationAction(ISD::FNEG, MVT::f128, Custom);
862 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
863
864 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
865 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
866 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
867 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
868 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
869 // No STRICT_FSINCOS
870 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
871 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
872
873 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
874 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
875 // We need to custom handle any FP_ROUND with an f128 input, but
876 // LegalizeDAG uses the result type to know when to run a custom handler.
877 // So we have to list all legal floating point result types here.
878 if (isTypeLegal(MVT::f32)) {
879 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
880 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
881 }
882 if (isTypeLegal(MVT::f64)) {
883 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
884 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
885 }
886 if (isTypeLegal(MVT::f80)) {
887 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
888 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
889 }
890
891 setOperationAction(ISD::SETCC, MVT::f128, Custom);
892
893 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
894 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
895 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
896 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
897 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
898 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
899 }
900
901 // Always use a library call for pow.
902 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
903 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
904 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
905 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
906
907 setOperationAction(ISD::FLOG, MVT::f80, Expand);
908 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
909 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
910 setOperationAction(ISD::FEXP, MVT::f80, Expand);
911 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
912 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
913 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
914
915 // Some FP actions are always expanded for vector types.
916 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
917 MVT::v4f32, MVT::v8f32, MVT::v16f32,
918 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
919 setOperationAction(ISD::FSIN, VT, Expand);
920 setOperationAction(ISD::FSINCOS, VT, Expand);
921 setOperationAction(ISD::FCOS, VT, Expand);
922 setOperationAction(ISD::FREM, VT, Expand);
923 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
924 setOperationAction(ISD::FPOW, VT, Expand);
925 setOperationAction(ISD::FLOG, VT, Expand);
926 setOperationAction(ISD::FLOG2, VT, Expand);
927 setOperationAction(ISD::FLOG10, VT, Expand);
928 setOperationAction(ISD::FEXP, VT, Expand);
929 setOperationAction(ISD::FEXP2, VT, Expand);
930 }
931
932 // First set operation action for all vector types to either promote
933 // (for widening) or expand (for scalarization). Then we will selectively
934 // turn on ones that can be effectively codegen'd.
935 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
936 setOperationAction(ISD::SDIV, VT, Expand);
937 setOperationAction(ISD::UDIV, VT, Expand);
938 setOperationAction(ISD::SREM, VT, Expand);
939 setOperationAction(ISD::UREM, VT, Expand);
940 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
941 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
942 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
943 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
944 setOperationAction(ISD::FMA, VT, Expand);
945 setOperationAction(ISD::FFLOOR, VT, Expand);
946 setOperationAction(ISD::FCEIL, VT, Expand);
947 setOperationAction(ISD::FTRUNC, VT, Expand);
948 setOperationAction(ISD::FRINT, VT, Expand);
949 setOperationAction(ISD::FNEARBYINT, VT, Expand);
950 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
951 setOperationAction(ISD::MULHS, VT, Expand);
952 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
953 setOperationAction(ISD::MULHU, VT, Expand);
954 setOperationAction(ISD::SDIVREM, VT, Expand);
955 setOperationAction(ISD::UDIVREM, VT, Expand);
956 setOperationAction(ISD::CTPOP, VT, Expand);
957 setOperationAction(ISD::CTTZ, VT, Expand);
958 setOperationAction(ISD::CTLZ, VT, Expand);
959 setOperationAction(ISD::ROTL, VT, Expand);
960 setOperationAction(ISD::ROTR, VT, Expand);
961 setOperationAction(ISD::BSWAP, VT, Expand);
962 setOperationAction(ISD::SETCC, VT, Expand);
963 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
964 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
965 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
966 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
967 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
968 setOperationAction(ISD::TRUNCATE, VT, Expand);
969 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
970 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
971 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
972 setOperationAction(ISD::SELECT_CC, VT, Expand);
973 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
974 setTruncStoreAction(InnerVT, VT, Expand);
975
976 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
977 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
978
979 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
980 // types, we have to deal with them whether we ask for Expansion or not.
981 // Setting Expand causes its own optimisation problems though, so leave
982 // them legal.
983 if (VT.getVectorElementType() == MVT::i1)
984 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
985
986 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
987 // split/scalarized right now.
988 if (VT.getVectorElementType() == MVT::f16 ||
989 VT.getVectorElementType() == MVT::bf16)
990 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
991 }
992 }
993
994 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
995 // with -msoft-float, disable use of MMX as well.
996 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
997 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
998 // No operations on x86mmx supported, everything uses intrinsics.
999 }
1000
1001 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
1002 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1003 : &X86::VR128RegClass);
1004
1005 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
1006 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
1007 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
1008 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
1009 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
1010 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
1011 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1012 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
1013
1014 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
1015 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
1016
1017 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1018 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1019 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1020 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1021 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1022 }
1023
1024 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
1025 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1026 : &X86::VR128RegClass);
1027
1028 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
1029 // registers cannot be used even for integer operations.
1030 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
1031 : &X86::VR128RegClass);
1032 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1033 : &X86::VR128RegClass);
1034 addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1035 : &X86::VR128RegClass);
1036 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1037 : &X86::VR128RegClass);
1038 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1039 : &X86::VR128RegClass);
1040
1041 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1042 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1043 setOperationAction(ISD::SDIV, VT, Custom);
1044 setOperationAction(ISD::SREM, VT, Custom);
1045 setOperationAction(ISD::UDIV, VT, Custom);
1046 setOperationAction(ISD::UREM, VT, Custom);
1047 }
1048
1049 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
1050 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
1051 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
1052
1053 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1054 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1055 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1056 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
1057 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
1058 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
1059 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
1060 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
1061 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
1062 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1063 setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
1064 setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
1065
1066 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
1067 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
1068 setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1069
1070 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1071 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1072 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
1073
1074 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1075 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1076 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
1077 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
1078 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
1079 }
1080
1081 setOperationAction(ISD::ABDU, MVT::v16i8, Custom);
1082 setOperationAction(ISD::ABDS, MVT::v8i16, Custom);
1083
1084 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
1085 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
1086 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
1087 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
1088 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
1089 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
1090 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
1091 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
1092 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
1093 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
1094
1095 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1096 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1097 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1098 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1099
1100 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1101 setOperationAction(ISD::SETCC, VT, Custom);
1102 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1103 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1104 setOperationAction(ISD::CTPOP, VT, Custom);
1105 setOperationAction(ISD::ABS, VT, Custom);
1106
1107 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1108 // setcc all the way to isel and prefer SETGT in some isel patterns.
1109 setCondCodeAction(ISD::SETLT, VT, Custom);
1110 setCondCodeAction(ISD::SETLE, VT, Custom);
1111 }
1112
1113 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1114 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1115 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1116 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1117 setOperationAction(ISD::VSELECT, VT, Custom);
1118 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1119 }
1120
1121 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1122 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1123 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1124 setOperationAction(ISD::VSELECT, VT, Custom);
1125
1126 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1127 continue;
1128
1129 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1130 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1131 }
1132 setF16Action(MVT::v8f16, Expand);
1133 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
1134 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
1135 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
1136 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
1137
1138 // Custom lower v2i64 and v2f64 selects.
1139 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1140 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1141 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1142 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1143 setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
1144 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1145
1146 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
1147 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1148 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1149 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1150 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
1151 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1152
1153 // Custom legalize these to avoid over promotion or custom promotion.
1154 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1155 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1156 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1157 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1158 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1159 }
1160
1161 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1162 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
1163 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1164 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1165
1166 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1167 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1168
1169 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1170 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1171
1172 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1173 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1174 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1175 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1176 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1177
1178 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1179 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1180 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1181 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1182
1183 // We want to legalize this to an f64 load rather than an i64 load on
1184 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1185 // store.
1186 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1187 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1188 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1189 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1190 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1191 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1192
1193 // Add 32-bit vector stores to help vectorization opportunities.
1194 setOperationAction(ISD::STORE, MVT::v2i16, Custom);
1195 setOperationAction(ISD::STORE, MVT::v4i8, Custom);
1196
1197 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1198 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1199 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1200 if (!Subtarget.hasAVX512())
1201 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1202
1203 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1204 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1205 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1206
1207 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1208
1209 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1210 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1211 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1212 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1213 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1214 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1215
1216 // In the customized shift lowering, the legal v4i32/v2i64 cases
1217 // in AVX2 will be recognized.
1218 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1219 setOperationAction(ISD::SRL, VT, Custom);
1220 setOperationAction(ISD::SHL, VT, Custom);
1221 setOperationAction(ISD::SRA, VT, Custom);
1222 if (VT == MVT::v2i64) continue;
1223 setOperationAction(ISD::ROTL, VT, Custom);
1224 setOperationAction(ISD::ROTR, VT, Custom);
1225 setOperationAction(ISD::FSHL, VT, Custom);
1226 setOperationAction(ISD::FSHR, VT, Custom);
1227 }
1228
1229 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1230 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1231 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1232 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1233 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1234 }
1235
1236 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1237 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1238 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1239 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1240 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1241 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1242 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1243 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1244 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1245
1246 // These might be better off as horizontal vector ops.
1247 setOperationAction(ISD::ADD, MVT::i16, Custom);
1248 setOperationAction(ISD::ADD, MVT::i32, Custom);
1249 setOperationAction(ISD::SUB, MVT::i16, Custom);
1250 setOperationAction(ISD::SUB, MVT::i32, Custom);
1251 }
1252
1253 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1254 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1255 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1256 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1257 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1258 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1259 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1260 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1261 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1262 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1263 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1264 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1265 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1266 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1267
1268 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1269 }
1270
1271 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1272 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1273 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1274 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1275 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1276 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1277 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1278 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1279
1280 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1281 setOperationAction(ISD::ABDS, VT, Custom);
1282 setOperationAction(ISD::ABDU, VT, Custom);
1283 }
1284
1285 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1286 setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
1287 setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
1288
1289 // FIXME: Do we need to handle scalar-to-vector here?
1290 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1291 setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
1292
1293 // We directly match byte blends in the backend as they match the VSELECT
1294 // condition form.
1295 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1296
1297 // SSE41 brings specific instructions for doing vector sign extend even in
1298 // cases where we don't have SRA.
1299 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1300 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1301 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1302 }
1303
1304 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1305 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1306 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1307 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1308 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1309 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1310 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1311 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1312 }
1313
1314 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1315 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1316 // do the pre and post work in the vector domain.
1317 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1318 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1319 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1320 // so that DAG combine doesn't try to turn it into uint_to_fp.
1321 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1322 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1323 }
1324 }
1325
1326 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1327 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1328 }
1329
1330 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1331 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1332 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1333 setOperationAction(ISD::ROTL, VT, Custom);
1334 setOperationAction(ISD::ROTR, VT, Custom);
1335 }
1336
1337 // XOP can efficiently perform BITREVERSE with VPPERM.
1338 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1339 setOperationAction(ISD::BITREVERSE, VT, Custom);
1340
1341 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1342 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1343 setOperationAction(ISD::BITREVERSE, VT, Custom);
1344 }
1345
1346 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1347 bool HasInt256 = Subtarget.hasInt256();
1348
1349 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1350 : &X86::VR256RegClass);
1351 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1352 : &X86::VR256RegClass);
1353 addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1354 : &X86::VR256RegClass);
1355 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1356 : &X86::VR256RegClass);
1357 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1358 : &X86::VR256RegClass);
1359 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1360 : &X86::VR256RegClass);
1361 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1362 : &X86::VR256RegClass);
1363
1364 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1365 setOperationAction(ISD::FFLOOR, VT, Legal);
1366 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1367 setOperationAction(ISD::FCEIL, VT, Legal);
1368 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1369 setOperationAction(ISD::FTRUNC, VT, Legal);
1370 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1371 setOperationAction(ISD::FRINT, VT, Legal);
1372 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1373 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1374 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1375 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1376 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1377
1378 setOperationAction(ISD::FROUND, VT, Custom);
1379
1380 setOperationAction(ISD::FNEG, VT, Custom);
1381 setOperationAction(ISD::FABS, VT, Custom);
1382 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1383 }
1384
1385 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1386 // even though v8i16 is a legal type.
1387 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1388 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1389 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1390 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1391 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
1392 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1393 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
1394
1395 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
1396 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
1397 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
1398 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
1399 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
1400 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
1401
1402 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1403 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1404 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1405 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1406 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1407 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1408 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1409 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1410 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1411 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1412 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1413
1414 if (!Subtarget.hasAVX512())
1415 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1416
1417 // In the customized shift lowering, the legal v8i32/v4i64 cases
1418 // in AVX2 will be recognized.
1419 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1420 setOperationAction(ISD::SRL, VT, Custom);
1421 setOperationAction(ISD::SHL, VT, Custom);
1422 setOperationAction(ISD::SRA, VT, Custom);
1423 setOperationAction(ISD::ABDS, VT, Custom);
1424 setOperationAction(ISD::ABDU, VT, Custom);
1425 if (VT == MVT::v4i64) continue;
1426 setOperationAction(ISD::ROTL, VT, Custom);
1427 setOperationAction(ISD::ROTR, VT, Custom);
1428 setOperationAction(ISD::FSHL, VT, Custom);
1429 setOperationAction(ISD::FSHR, VT, Custom);
1430 }
1431
1432 // These types need custom splitting if their input is a 128-bit vector.
1433 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1434 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1435 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1436 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1437
1438 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1439 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1440 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1441 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1442 setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
1443 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1444 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1445
1446 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1447 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1448 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1449 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1450 }
1451
1452 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1453 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1454 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1455 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1456
1457 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1458 setOperationAction(ISD::SETCC, VT, Custom);
1459 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1460 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1461 setOperationAction(ISD::CTPOP, VT, Custom);
1462 setOperationAction(ISD::CTLZ, VT, Custom);
1463
1464 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1465 // setcc all the way to isel and prefer SETGT in some isel patterns.
1466 setCondCodeAction(ISD::SETLT, VT, Custom);
1467 setCondCodeAction(ISD::SETLE, VT, Custom);
1468 }
1469
1470 if (Subtarget.hasAnyFMA()) {
1471 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1472 MVT::v2f64, MVT::v4f64 }) {
1473 setOperationAction(ISD::FMA, VT, Legal);
1474 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1475 }
1476 }
1477
1478 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1479 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1480 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1481 }
1482
1483 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1484 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1485 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1486 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1487
1488 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1489 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1490 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1491 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1492 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1493 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1494 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1495 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1496
1497 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1498 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1499
1500 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1501 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1502 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1503 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1504 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1505
1506 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1507 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1508 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1509 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1510 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1511 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1512 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1513 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1514 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1515 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1516 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1517 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1518
1519 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1520 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1521 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1522 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1523 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1524 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1525 }
1526
1527 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1528 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1529 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1530 }
1531
1532 if (HasInt256) {
1533 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1534 // when we have a 256bit-wide blend with immediate.
1535 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1536 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1537
1538 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1539 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1540 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1541 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1542 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1543 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1544 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1545 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1546 }
1547 }
1548
1549 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1550 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1551 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1552 setOperationAction(ISD::MSTORE, VT, Legal);
1553 }
1554
1555 // Extract subvector is special because the value type
1556 // (result) is 128-bit but the source is 256-bit wide.
1557 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1558 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1559 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1560 }
1561
1562 // Custom lower several nodes for 256-bit types.
1563 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1564 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1565 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1566 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1567 setOperationAction(ISD::VSELECT, VT, Custom);
1568 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1569 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1570 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1571 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1572 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1573 setOperationAction(ISD::STORE, VT, Custom);
1574 }
1575 setF16Action(MVT::v16f16, Expand);
1576 setOperationAction(ISD::FADD, MVT::v16f16, Expand);
1577 setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
1578 setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
1579 setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
1580
1581 if (HasInt256) {
1582 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1583
1584 // Custom legalize 2x32 to get a little better code.
1585 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1586 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1587
1588 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1589 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1590 setOperationAction(ISD::MGATHER, VT, Custom);
1591 }
1592 }
1593
1594 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1595 Subtarget.hasF16C()) {
1596 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1597 setOperationAction(ISD::FP_ROUND, VT, Custom);
1598 setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1599 }
1600 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) {
1601 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1602 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
1603 }
1604 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1605 setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
1606 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1607 }
1608
1609 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1610 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
1611 }
1612
1613 // This block controls legalization of the mask vector sizes that are
1614 // available with AVX512. 512-bit vectors are in a separate block controlled
1615 // by useAVX512Regs.
1616 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1617 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1618 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1619 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1620 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1621 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1622
1623 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1624 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1625 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1626
1627 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1628 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1629 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1630 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1631 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1632 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1633 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1634 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1635 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1636 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1637 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1638 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1639
1640 // There is no byte sized k-register load or store without AVX512DQ.
1641 if (!Subtarget.hasDQI()) {
1642 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1643 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1644 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1645 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1646
1647 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1648 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1649 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1650 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1651 }
1652
1653 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1654 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1655 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1656 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1657 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1658 }
1659
1660 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1661 setOperationAction(ISD::VSELECT, VT, Expand);
1662
1663 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1664 setOperationAction(ISD::SETCC, VT, Custom);
1665 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1666 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1667 setOperationAction(ISD::SELECT, VT, Custom);
1668 setOperationAction(ISD::TRUNCATE, VT, Custom);
1669
1670 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1671 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1672 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1673 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1674 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1675 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1676 }
1677
1678 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1679 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1680 }
1681
1682 // This block controls legalization for 512-bit operations with 32/64 bit
1683 // elements. 512-bits can be disabled based on prefer-vector-width and
1684 // required-vector-width function attributes.
1685 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1686 bool HasBWI = Subtarget.hasBWI();
1687
1688 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1689 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1690 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1691 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1692 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1693 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1694 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1695
1696 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1697 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1698 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1699 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1700 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1701 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1702 if (HasBWI)
1703 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1704 }
1705
1706 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1707 setOperationAction(ISD::FNEG, VT, Custom);
1708 setOperationAction(ISD::FABS, VT, Custom);
1709 setOperationAction(ISD::FMA, VT, Legal);
1710 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1711 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1712 }
1713
1714 for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1715 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1716 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1717 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1718 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1719 }
1720
1721 for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1722 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1723 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1724 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1725 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1726 }
1727
1728 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
1729 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
1730 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
1731 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
1732 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
1733 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
1734
1735 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1736 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1737 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1738 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1739 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1740 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1741 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1742 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1743 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1744 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1745 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1746
1747 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1748 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1749 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1750 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1751 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1752 if (HasBWI)
1753 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1754
1755 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1756 // to 512-bit rather than use the AVX2 instructions so that we can use
1757 // k-masks.
1758 if (!Subtarget.hasVLX()) {
1759 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1760 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1761 setOperationAction(ISD::MLOAD, VT, Custom);
1762 setOperationAction(ISD::MSTORE, VT, Custom);
1763 }
1764 }
1765
1766 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1767 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1768 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1769 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1770 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1771 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1772 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1773 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1774 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1775 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1776 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1777 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1778 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1779
1780 if (HasBWI) {
1781 // Extends from v64i1 masks to 512-bit vectors.
1782 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1783 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1784 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1785 }
1786
1787 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1788 setOperationAction(ISD::FFLOOR, VT, Legal);
1789 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1790 setOperationAction(ISD::FCEIL, VT, Legal);
1791 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1792 setOperationAction(ISD::FTRUNC, VT, Legal);
1793 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1794 setOperationAction(ISD::FRINT, VT, Legal);
1795 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1796 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1797 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1798 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1799 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1800
1801 setOperationAction(ISD::FROUND, VT, Custom);
1802 }
1803
1804 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1805 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1806 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1807 }
1808
1809 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1810 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1811 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1812 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1813
1814 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1815 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1816 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1817 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1818
1819 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1820 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1821 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1822 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1823 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1824 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1825 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1826 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1827
1828 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1829 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1830
1831 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1832
1833 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1834 setOperationAction(ISD::SRL, VT, Custom);
1835 setOperationAction(ISD::SHL, VT, Custom);
1836 setOperationAction(ISD::SRA, VT, Custom);
1837 setOperationAction(ISD::ROTL, VT, Custom);
1838 setOperationAction(ISD::ROTR, VT, Custom);
1839 setOperationAction(ISD::SETCC, VT, Custom);
1840 setOperationAction(ISD::ABDS, VT, Custom);
1841 setOperationAction(ISD::ABDU, VT, Custom);
1842
1843 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1844 // setcc all the way to isel and prefer SETGT in some isel patterns.
1845 setCondCodeAction(ISD::SETLT, VT, Custom);
1846 setCondCodeAction(ISD::SETLE, VT, Custom);
1847 }
1848 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1849 setOperationAction(ISD::SMAX, VT, Legal);
1850 setOperationAction(ISD::UMAX, VT, Legal);
1851 setOperationAction(ISD::SMIN, VT, Legal);
1852 setOperationAction(ISD::UMIN, VT, Legal);
1853 setOperationAction(ISD::ABS, VT, Legal);
1854 setOperationAction(ISD::CTPOP, VT, Custom);
1855 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1856 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1857 }
1858
1859 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1860 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1861 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1862 setOperationAction(ISD::CTLZ, VT, Custom);
1863 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1864 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1865 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1866 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1867 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1868 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1869 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1870 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1871 }
1872
1873 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1874 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1875 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1876 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1877 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1878 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1879
1880 if (Subtarget.hasDQI()) {
1881 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1882 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1883 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
1884 setOperationAction(Opc, MVT::v8i64, Custom);
1885 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1886 }
1887
1888 if (Subtarget.hasCDI()) {
1889 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1890 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1891 setOperationAction(ISD::CTLZ, VT, Legal);
1892 }
1893 } // Subtarget.hasCDI()
1894
1895 if (Subtarget.hasVPOPCNTDQ()) {
1896 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1897 setOperationAction(ISD::CTPOP, VT, Legal);
1898 }
1899
1900 // Extract subvector is special because the value type
1901 // (result) is 256-bit but the source is 512-bit wide.
1902 // 128-bit was made Legal under AVX1.
1903 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1904 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1905 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1906
1907 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1908 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
1909 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1910 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1911 setOperationAction(ISD::SELECT, VT, Custom);
1912 setOperationAction(ISD::VSELECT, VT, Custom);
1913 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1914 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1915 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1916 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1917 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1918 }
1919 setF16Action(MVT::v32f16, Expand);
1920 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
1921 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom);
1922 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
1923 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
1924 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1925 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1926 setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
1927 }
1928
1929 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1930 setOperationAction(ISD::MLOAD, VT, Legal);
1931 setOperationAction(ISD::MSTORE, VT, Legal);
1932 setOperationAction(ISD::MGATHER, VT, Custom);
1933 setOperationAction(ISD::MSCATTER, VT, Custom);
1934 }
1935 if (HasBWI) {
1936 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1937 setOperationAction(ISD::MLOAD, VT, Legal);
1938 setOperationAction(ISD::MSTORE, VT, Legal);
1939 }
1940 } else {
1941 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1942 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1943 }
1944
1945 if (Subtarget.hasVBMI2()) {
1946 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1947 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1948 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1949 setOperationAction(ISD::FSHL, VT, Custom);
1950 setOperationAction(ISD::FSHR, VT, Custom);
1951 }
1952
1953 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1954 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1955 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1956 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1957 }
1958 }// useAVX512Regs
1959
1960 // This block controls legalization for operations that don't have
1961 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1962 // narrower widths.
1963 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1964 // These operations are handled on non-VLX by artificially widening in
1965 // isel patterns.
1966
1967 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
1968 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
1969 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1970
1971 if (Subtarget.hasDQI()) {
1972 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1973 // v2f32 UINT_TO_FP is already custom under SSE2.
1974 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1976, __extension__
__PRETTY_FUNCTION__))
1975 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1976, __extension__
__PRETTY_FUNCTION__))
1976 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1976, __extension__
__PRETTY_FUNCTION__))
;
1977 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1978 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1979 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1980 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1981 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1982 }
1983
1984 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1985 setOperationAction(ISD::SMAX, VT, Legal);
1986 setOperationAction(ISD::UMAX, VT, Legal);
1987 setOperationAction(ISD::SMIN, VT, Legal);
1988 setOperationAction(ISD::UMIN, VT, Legal);
1989 setOperationAction(ISD::ABS, VT, Legal);
1990 }
1991
1992 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1993 setOperationAction(ISD::ROTL, VT, Custom);
1994 setOperationAction(ISD::ROTR, VT, Custom);
1995 }
1996
1997 // Custom legalize 2x32 to get a little better code.
1998 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1999 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
2000
2001 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2002 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2003 setOperationAction(ISD::MSCATTER, VT, Custom);
2004
2005 if (Subtarget.hasDQI()) {
2006 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
2007 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
2008 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) {
2009 setOperationAction(Opc, MVT::v2i64, Custom);
2010 setOperationAction(Opc, MVT::v4i64, Custom);
2011 }
2012 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
2013 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
2014 }
2015
2016 if (Subtarget.hasCDI()) {
2017 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2018 setOperationAction(ISD::CTLZ, VT, Legal);
2019 }
2020 } // Subtarget.hasCDI()
2021
2022 if (Subtarget.hasVPOPCNTDQ()) {
2023 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2024 setOperationAction(ISD::CTPOP, VT, Legal);
2025 }
2026 }
2027
2028 // This block control legalization of v32i1/v64i1 which are available with
2029 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
2030 // useBWIRegs.
2031 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2032 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
2033 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
2034
2035 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
2036 setOperationAction(ISD::VSELECT, VT, Expand);
2037 setOperationAction(ISD::TRUNCATE, VT, Custom);
2038 setOperationAction(ISD::SETCC, VT, Custom);
2039 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2040 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
2041 setOperationAction(ISD::SELECT, VT, Custom);
2042 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2043 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2044 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
2045 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
2046 }
2047
2048 for (auto VT : { MVT::v16i1, MVT::v32i1 })
2049 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
2050
2051 // Extends from v32i1 masks to 256-bit vectors.
2052 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
2053 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
2054 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
2055
2056 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2057 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
2058 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
2059 }
2060
2061 // These operations are handled on non-VLX by artificially widening in
2062 // isel patterns.
2063 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
2064
2065 if (Subtarget.hasBITALG()) {
2066 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2067 setOperationAction(ISD::CTPOP, VT, Legal);
2068 }
2069 }
2070
2071 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2072 auto setGroup = [&] (MVT VT) {
2073 setOperationAction(ISD::FADD, VT, Legal);
2074 setOperationAction(ISD::STRICT_FADD, VT, Legal);
2075 setOperationAction(ISD::FSUB, VT, Legal);
2076 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
2077 setOperationAction(ISD::FMUL, VT, Legal);
2078 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
2079 setOperationAction(ISD::FDIV, VT, Legal);
2080 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
2081 setOperationAction(ISD::FSQRT, VT, Legal);
2082 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
2083
2084 setOperationAction(ISD::FFLOOR, VT, Legal);
2085 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
2086 setOperationAction(ISD::FCEIL, VT, Legal);
2087 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
2088 setOperationAction(ISD::FTRUNC, VT, Legal);
2089 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
2090 setOperationAction(ISD::FRINT, VT, Legal);
2091 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
2092 setOperationAction(ISD::FNEARBYINT, VT, Legal);
2093 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
2094
2095 setOperationAction(ISD::FROUND, VT, Custom);
2096
2097 setOperationAction(ISD::LOAD, VT, Legal);
2098 setOperationAction(ISD::STORE, VT, Legal);
2099
2100 setOperationAction(ISD::FMA, VT, Legal);
2101 setOperationAction(ISD::STRICT_FMA, VT, Legal);
2102 setOperationAction(ISD::VSELECT, VT, Legal);
2103 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2104 setOperationAction(ISD::SELECT, VT, Custom);
2105
2106 setOperationAction(ISD::FNEG, VT, Custom);
2107 setOperationAction(ISD::FABS, VT, Custom);
2108 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
2109 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2110 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2111 };
2112
2113 // AVX512_FP16 scalar operations
2114 setGroup(MVT::f16);
2115 setOperationAction(ISD::FREM, MVT::f16, Promote);
2116 setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
2117 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
2118 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
2119 setOperationAction(ISD::SETCC, MVT::f16, Custom);
2120 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
2121 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
2122 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
2123 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
2124 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
2125 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
2126 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
2127 setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
2128 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
2129
2130 setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
2131 setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
2132
2133 if (Subtarget.useAVX512Regs()) {
2134 setGroup(MVT::v32f16);
2135 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
2136 setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
2137 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
2138 setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
2139 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
2140 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
2141 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
2142 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
2143 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
2144 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal);
2145 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
2146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
2147
2148 setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
2149 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
2150 setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
2151 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
2152 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
2153 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
2154 MVT::v32i16);
2155 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2156 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
2157 MVT::v32i16);
2158 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2159 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
2160 MVT::v32i16);
2161 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2162 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
2163 MVT::v32i16);
2164
2165 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
2166 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
2167 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
2168
2169 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2170 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2171
2172 setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
2173 setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
2174 }
2175
2176 if (Subtarget.hasVLX()) {
2177 setGroup(MVT::v8f16);
2178 setGroup(MVT::v16f16);
2179
2180 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
2181 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
2182 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
2183 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
2184 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
2185 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
2186 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
2187 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
2188 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
2189 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
2190
2191 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
2192 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
2193 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
2194 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
2195 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
2196 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
2197 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
2198 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
2199 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
2200 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
2201
2202 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2203 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
2204 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
2205
2206 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
2207 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
2208 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
2209
2210 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2211 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2212 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2213 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2214
2215 // Need to custom widen these to prevent scalarization.
2216 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2217 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2218 }
2219 }
2220
2221 if (!Subtarget.useSoftFloat() &&
2222 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2223 addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
2224 addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
2225 // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
2226 // provide the method to promote BUILD_VECTOR. Set the operation action
2227 // Custom to do the customization later.
2228 setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
2229 for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2230 setF16Action(VT, Expand);
2231 setOperationAction(ISD::FADD, VT, Expand);
2232 setOperationAction(ISD::FSUB, VT, Expand);
2233 setOperationAction(ISD::FMUL, VT, Expand);
2234 setOperationAction(ISD::FDIV, VT, Expand);
2235 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2236 }
2237 addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
2238 }
2239
2240 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2241 addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
2242 setF16Action(MVT::v32bf16, Expand);
2243 setOperationAction(ISD::FADD, MVT::v32bf16, Expand);
2244 setOperationAction(ISD::FSUB, MVT::v32bf16, Expand);
2245 setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
2246 setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
2247 setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
2248 }
2249
2250 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2251 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2252 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2253 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2254 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2255 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2256
2257 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2258 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2259 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2260 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2261 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2262
2263 if (Subtarget.hasBWI()) {
2264 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2265 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2266 }
2267
2268 if (Subtarget.hasFP16()) {
2269 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2270 setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
2271 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
2272 setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
2273 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
2274 setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
2275 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
2276 setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
2277 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
2278 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2279 setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
2280 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
2281 setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
2282 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
2283 setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
2284 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
2285 setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
2286 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
2287 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2288 setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
2289 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
2290 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
2291 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
2292 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2293 setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
2294 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
2295 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
2296 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
2297 }
2298
2299 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
2300 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
2301 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
2302 }
2303
2304 if (Subtarget.hasAMXTILE()) {
2305 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2306 }
2307
2308 // We want to custom lower some of our intrinsics.
2309 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
2310 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
2311 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
2312 if (!Subtarget.is64Bit()) {
2313 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
2314 }
2315
2316 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2317 // handle type legalization for these operations here.
2318 //
2319 // FIXME: We really should do custom legalization for addition and
2320 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2321 // than generic legalization for 64-bit multiplication-with-overflow, though.
2322 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2323 if (VT == MVT::i64 && !Subtarget.is64Bit())
2324 continue;
2325 // Add/Sub/Mul with overflow operations are custom lowered.
2326 setOperationAction(ISD::SADDO, VT, Custom);
2327 setOperationAction(ISD::UADDO, VT, Custom);
2328 setOperationAction(ISD::SSUBO, VT, Custom);
2329 setOperationAction(ISD::USUBO, VT, Custom);
2330 setOperationAction(ISD::SMULO, VT, Custom);
2331 setOperationAction(ISD::UMULO, VT, Custom);
2332
2333 // Support carry in as value rather than glue.
2334 setOperationAction(ISD::ADDCARRY, VT, Custom);
2335 setOperationAction(ISD::SUBCARRY, VT, Custom);
2336 setOperationAction(ISD::SETCCCARRY, VT, Custom);
2337 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
2338 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
2339 }
2340
2341 if (!Subtarget.is64Bit()) {
2342 // These libcalls are not available in 32-bit.
2343 setLibcallName(RTLIB::SHL_I128, nullptr);
2344 setLibcallName(RTLIB::SRL_I128, nullptr);
2345 setLibcallName(RTLIB::SRA_I128, nullptr);
2346 setLibcallName(RTLIB::MUL_I128, nullptr);
2347 // The MULO libcall is not part of libgcc, only compiler-rt.
2348 setLibcallName(RTLIB::MULO_I64, nullptr);
2349 }
2350 // The MULO libcall is not part of libgcc, only compiler-rt.
2351 setLibcallName(RTLIB::MULO_I128, nullptr);
2352
2353 // Combine sin / cos into _sincos_stret if it is available.
2354 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2355 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2356 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
2357 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
2358 }
2359
2360 if (Subtarget.isTargetWin64()) {
2361 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2362 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2363 setOperationAction(ISD::SREM, MVT::i128, Custom);
2364 setOperationAction(ISD::UREM, MVT::i128, Custom);
2365 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
2366 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
2367 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
2368 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
2369 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
2370 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
2371 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
2372 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
2373 }
2374
2375 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2376 // is. We should promote the value to 64-bits to solve this.
2377 // This is what the CRT headers do - `fmodf` is an inline header
2378 // function casting to f64 and calling `fmod`.
2379 if (Subtarget.is32Bit() &&
2380 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2381 for (ISD::NodeType Op :
2382 {ISD::FCEIL, ISD::STRICT_FCEIL,
2383 ISD::FCOS, ISD::STRICT_FCOS,
2384 ISD::FEXP, ISD::STRICT_FEXP,
2385 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2386 ISD::FREM, ISD::STRICT_FREM,
2387 ISD::FLOG, ISD::STRICT_FLOG,
2388 ISD::FLOG10, ISD::STRICT_FLOG10,
2389 ISD::FPOW, ISD::STRICT_FPOW,
2390 ISD::FSIN, ISD::STRICT_FSIN})
2391 if (isOperationExpand(Op, MVT::f32))
2392 setOperationAction(Op, MVT::f32, Promote);
2393
2394 // We have target-specific dag combine patterns for the following nodes:
2395 setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
2396 ISD::SCALAR_TO_VECTOR,
2397 ISD::INSERT_VECTOR_ELT,
2398 ISD::EXTRACT_VECTOR_ELT,
2399 ISD::CONCAT_VECTORS,
2400 ISD::INSERT_SUBVECTOR,
2401 ISD::EXTRACT_SUBVECTOR,
2402 ISD::BITCAST,
2403 ISD::VSELECT,
2404 ISD::SELECT,
2405 ISD::SHL,
2406 ISD::SRA,
2407 ISD::SRL,
2408 ISD::OR,
2409 ISD::AND,
2410 ISD::ADD,
2411 ISD::FADD,
2412 ISD::FSUB,
2413 ISD::FNEG,
2414 ISD::FMA,
2415 ISD::STRICT_FMA,
2416 ISD::FMINNUM,
2417 ISD::FMAXNUM,
2418 ISD::SUB,
2419 ISD::LOAD,
2420 ISD::MLOAD,
2421 ISD::STORE,
2422 ISD::MSTORE,
2423 ISD::TRUNCATE,
2424 ISD::ZERO_EXTEND,
2425 ISD::ANY_EXTEND,
2426 ISD::SIGN_EXTEND,
2427 ISD::SIGN_EXTEND_INREG,
2428 ISD::ANY_EXTEND_VECTOR_INREG,
2429 ISD::SIGN_EXTEND_VECTOR_INREG,
2430 ISD::ZERO_EXTEND_VECTOR_INREG,
2431 ISD::SINT_TO_FP,
2432 ISD::UINT_TO_FP,
2433 ISD::STRICT_SINT_TO_FP,
2434 ISD::STRICT_UINT_TO_FP,
2435 ISD::SETCC,
2436 ISD::MUL,
2437 ISD::XOR,
2438 ISD::MSCATTER,
2439 ISD::MGATHER,
2440 ISD::FP16_TO_FP,
2441 ISD::FP_EXTEND,
2442 ISD::STRICT_FP_EXTEND,
2443 ISD::FP_ROUND,
2444 ISD::STRICT_FP_ROUND});
2445
2446 computeRegisterProperties(Subtarget.getRegisterInfo());
2447
2448 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2449 MaxStoresPerMemsetOptSize = 8;
2450 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2451 MaxStoresPerMemcpyOptSize = 4;
2452 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2453 MaxStoresPerMemmoveOptSize = 4;
2454
2455 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2456 // that needs to benchmarked and balanced with the potential use of vector
2457 // load/store types (PR33329, PR33914).
2458 MaxLoadsPerMemcmp = 2;
2459 MaxLoadsPerMemcmpOptSize = 2;
2460
2461 // Default loop alignment, which can be overridden by -align-loops.
2462 setPrefLoopAlignment(Align(16));
2463
2464 // An out-of-order CPU can speculatively execute past a predictable branch,
2465 // but a conditional move could be stalled by an expensive earlier operation.
2466 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2467 EnableExtLdPromotion = true;
2468 setPrefFunctionAlignment(Align(16));
2469
2470 verifyIntrinsicTables();
2471
2472 // Default to having -disable-strictnode-mutation on
2473 IsStrictFPEnabled = true;
2474}
2475
2476// This has so far only been implemented for 64-bit MachO.
2477bool X86TargetLowering::useLoadStackGuardNode() const {
2478 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2479}
2480
2481bool X86TargetLowering::useStackGuardXorFP() const {
2482 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2483 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2484}
2485
2486SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2487 const SDLoc &DL) const {
2488 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2489 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2490 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2491 return SDValue(Node, 0);
2492}
2493
2494TargetLoweringBase::LegalizeTypeAction
2495X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2496 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2497 !Subtarget.hasBWI())
2498 return TypeSplitVector;
2499
2500 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2501 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
2502 return TypeSplitVector;
2503
2504 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2505 VT.getVectorElementType() != MVT::i1)
2506 return TypeWidenVector;
2507
2508 return TargetLoweringBase::getPreferredVectorAction(VT);
2509}
2510
2511static std::pair<MVT, unsigned>
2512handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2513 const X86Subtarget &Subtarget) {
2514 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2515 // convention is one that uses k registers.
2516 if (NumElts == 2)
2517 return {MVT::v2i64, 1};
2518 if (NumElts == 4)
2519 return {MVT::v4i32, 1};
2520 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2521 CC != CallingConv::Intel_OCL_BI)
2522 return {MVT::v8i16, 1};
2523 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2524 CC != CallingConv::Intel_OCL_BI)
2525 return {MVT::v16i8, 1};
2526 // v32i1 passes in ymm unless we have BWI and the calling convention is
2527 // regcall.
2528 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2529 return {MVT::v32i8, 1};
2530 // Split v64i1 vectors if we don't have v64i8 available.
2531 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2532 if (Subtarget.useAVX512Regs())
2533 return {MVT::v64i8, 1};
2534 return {MVT::v32i8, 2};
2535 }
2536
2537 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2538 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2539 NumElts > 64)
2540 return {MVT::i8, NumElts};
2541
2542 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2543}
2544
2545MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2546 CallingConv::ID CC,
2547 EVT VT) const {
2548 if (VT.isVector()) {
2549 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2550 unsigned NumElts = VT.getVectorNumElements();
2551
2552 MVT RegisterVT;
2553 unsigned NumRegisters;
2554 std::tie(RegisterVT, NumRegisters) =
2555 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2556 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2557 return RegisterVT;
2558 }
2559
2560 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2561 return MVT::v8f16;
2562 }
2563
2564 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
2565 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
2566 !Subtarget.hasX87())
2567 return MVT::i32;
2568
2569 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2570 return getRegisterTypeForCallingConv(Context, CC,
2571 VT.changeVectorElementTypeToInteger());
2572
2573 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2574}
2575
2576unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2577 CallingConv::ID CC,
2578 EVT VT) const {
2579 if (VT.isVector()) {
2580 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2581 unsigned NumElts = VT.getVectorNumElements();
2582
2583 MVT RegisterVT;
2584 unsigned NumRegisters;
2585 std::tie(RegisterVT, NumRegisters) =
2586 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2587 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2588 return NumRegisters;
2589 }
2590
2591 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2592 return 1;
2593 }
2594
2595 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
2596 // x87 is disabled.
2597 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
2598 if (VT == MVT::f64)
2599 return 2;
2600 if (VT == MVT::f80)
2601 return 3;
2602 }
2603
2604 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2605 return getNumRegistersForCallingConv(Context, CC,
2606 VT.changeVectorElementTypeToInteger());
2607
2608 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2609}
2610
2611unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2612 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2613 unsigned &NumIntermediates, MVT &RegisterVT) const {
2614 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2615 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2616 Subtarget.hasAVX512() &&
2617 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2618 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2619 VT.getVectorNumElements() > 64)) {
2620 RegisterVT = MVT::i8;
2621 IntermediateVT = MVT::i1;
2622 NumIntermediates = VT.getVectorNumElements();
2623 return NumIntermediates;
2624 }
2625
2626 // Split v64i1 vectors if we don't have v64i8 available.
2627 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2628 CC != CallingConv::X86_RegCall) {
2629 RegisterVT = MVT::v32i8;
2630 IntermediateVT = MVT::v32i1;
2631 NumIntermediates = 2;
2632 return 2;
2633 }
2634
2635 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2636 NumIntermediates, RegisterVT);
2637}
2638
2639EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2640 LLVMContext& Context,
2641 EVT VT) const {
2642 if (!VT.isVector())
2643 return MVT::i8;
2644
2645 if (Subtarget.hasAVX512()) {
2646 // Figure out what this type will be legalized to.
2647 EVT LegalVT = VT;
2648 while (getTypeAction(Context, LegalVT) != TypeLegal)
2649 LegalVT = getTypeToTransformTo(Context, LegalVT);
2650
2651 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2652 if (LegalVT.getSimpleVT().is512BitVector())
2653 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2654
2655 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2656 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2657 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2658 // vXi16/vXi8.
2659 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2660 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2661 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2662 }
2663 }
2664
2665 return VT.changeVectorElementTypeToInteger();
2666}
2667
2668/// Helper for getByValTypeAlignment to determine
2669/// the desired ByVal argument alignment.
2670static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2671 if (MaxAlign == 16)
2672 return;
2673 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2674 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
2675 MaxAlign = Align(16);
2676 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2677 Align EltAlign;
2678 getMaxByValAlign(ATy->getElementType(), EltAlign);
2679 if (EltAlign > MaxAlign)
2680 MaxAlign = EltAlign;
2681 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2682 for (auto *EltTy : STy->elements()) {
2683 Align EltAlign;
2684 getMaxByValAlign(EltTy, EltAlign);
2685 if (EltAlign > MaxAlign)
2686 MaxAlign = EltAlign;
2687 if (MaxAlign == 16)
2688 break;
2689 }
2690 }
2691}
2692
2693/// Return the desired alignment for ByVal aggregate
2694/// function arguments in the caller parameter area. For X86, aggregates
2695/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2696/// are at 4-byte boundaries.
2697uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
2698 const DataLayout &DL) const {
2699 if (Subtarget.is64Bit()) {
2700 // Max of 8 and alignment of type.
2701 Align TyAlign = DL.getABITypeAlign(Ty);
2702 if (TyAlign > 8)
2703 return TyAlign.value();
2704 return 8;
2705 }
2706
2707 Align Alignment(4);
2708 if (Subtarget.hasSSE1())
2709 getMaxByValAlign(Ty, Alignment);
2710 return Alignment.value();
2711}
2712
2713/// It returns EVT::Other if the type should be determined using generic
2714/// target-independent logic.
2715/// For vector ops we check that the overall size isn't larger than our
2716/// preferred vector width.
2717EVT X86TargetLowering::getOptimalMemOpType(
2718 const MemOp &Op, const AttributeList &FuncAttributes) const {
2719 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2720 if (Op.size() >= 16 &&
2721 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2722 // FIXME: Check if unaligned 64-byte accesses are slow.
2723 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2724 (Subtarget.getPreferVectorWidth() >= 512)) {
2725 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2726 }
2727 // FIXME: Check if unaligned 32-byte accesses are slow.
2728 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2729 Subtarget.useLight256BitInstructions()) {
2730 // Although this isn't a well-supported type for AVX1, we'll let
2731 // legalization and shuffle lowering produce the optimal codegen. If we
2732 // choose an optimal type with a vector element larger than a byte,
2733 // getMemsetStores() may create an intermediate splat (using an integer
2734 // multiply) before we splat as a vector.
2735 return MVT::v32i8;
2736 }
2737 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2738 return MVT::v16i8;
2739 // TODO: Can SSE1 handle a byte vector?
2740 // If we have SSE1 registers we should be able to use them.
2741 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2742 (Subtarget.getPreferVectorWidth() >= 128))
2743 return MVT::v4f32;
2744 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2745 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2746 // Do not use f64 to lower memcpy if source is string constant. It's
2747 // better to use i32 to avoid the loads.
2748 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2749 // The gymnastics of splatting a byte value into an XMM register and then
2750 // only using 8-byte stores (because this is a CPU with slow unaligned
2751 // 16-byte accesses) makes that a loser.
2752 return MVT::f64;
2753 }
2754 }
2755 // This is a compromise. If we reach here, unaligned accesses may be slow on
2756 // this target. However, creating smaller, aligned accesses could be even
2757 // slower and would certainly be a lot more code.
2758 if (Subtarget.is64Bit() && Op.size() >= 8)
2759 return MVT::i64;
2760 return MVT::i32;
2761}
2762
2763bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2764 if (VT == MVT::f32)
2765 return Subtarget.hasSSE1();
2766 if (VT == MVT::f64)
2767 return Subtarget.hasSSE2();
2768 return true;
2769}
2770
2771static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
2772 return (8 * Alignment.value()) % SizeInBits == 0;
2773}
2774
2775bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
2776 if (isBitAligned(Alignment, VT.getSizeInBits()))
2777 return true;
2778 switch (VT.getSizeInBits()) {
2779 default:
2780 // 8-byte and under are always assumed to be fast.
2781 return true;
2782 case 128:
2783 return !Subtarget.isUnalignedMem16Slow();
2784 case 256:
2785 return !Subtarget.isUnalignedMem32Slow();
2786 // TODO: What about AVX-512 (512-bit) accesses?
2787 }
2788}
2789
2790bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2791 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2792 unsigned *Fast) const {
2793 if (Fast)
2794 *Fast = isMemoryAccessFast(VT, Alignment);
2795 // NonTemporal vector memory ops must be aligned.
2796 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2797 // NT loads can only be vector aligned, so if its less aligned than the
2798 // minimum vector size (which we can split the vector down to), we might as
2799 // well use a regular unaligned vector load.
2800 // We don't have any NT loads pre-SSE41.
2801 if (!!(Flags & MachineMemOperand::MOLoad))
2802 return (Alignment < 16 || !Subtarget.hasSSE41());
2803 return false;
2804 }
2805 // Misaligned accesses of any size are always allowed.
2806 return true;
2807}
2808
2809bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
2810 const DataLayout &DL, EVT VT,
2811 unsigned AddrSpace, Align Alignment,
2812 MachineMemOperand::Flags Flags,
2813 unsigned *Fast) const {
2814 if (Fast)
2815 *Fast = isMemoryAccessFast(VT, Alignment);
2816 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2817 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
2818 /*Fast=*/nullptr))
2819 return true;
2820 // NonTemporal vector memory ops are special, and must be aligned.
2821 if (!isBitAligned(Alignment, VT.getSizeInBits()))
2822 return false;
2823 switch (VT.getSizeInBits()) {
2824 case 128:
2825 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
2826 return true;
2827 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
2828 return true;
2829 return false;
2830 case 256:
2831 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
2832 return true;
2833 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
2834 return true;
2835 return false;
2836 case 512:
2837 if (Subtarget.hasAVX512())
2838 return true;
2839 return false;
2840 default:
2841 return false; // Don't have NonTemporal vector memory ops of this size.
2842 }
2843 }
2844 return true;
2845}
2846
2847/// Return the entry encoding for a jump table in the
2848/// current function. The returned value is a member of the
2849/// MachineJumpTableInfo::JTEntryKind enum.
2850unsigned X86TargetLowering::getJumpTableEncoding() const {
2851 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2852 // symbol.
2853 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2854 return MachineJumpTableInfo::EK_Custom32;
2855
2856 // Otherwise, use the normal jump table encoding heuristics.
2857 return TargetLowering::getJumpTableEncoding();
2858}
2859
2860bool X86TargetLowering::splitValueIntoRegisterParts(
2861 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
2862 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
2863 bool IsABIRegCopy = CC.has_value();
2864 EVT ValueVT = Val.getValueType();
2865 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2866 unsigned ValueBits = ValueVT.getSizeInBits();
2867 unsigned PartBits = PartVT.getSizeInBits();
2868 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
2869 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
2870 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
2871 Parts[0] = Val;
2872 return true;
2873 }
2874 return false;
2875}
2876
2877SDValue X86TargetLowering::joinRegisterPartsIntoValue(
2878 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
2879 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
2880 bool IsABIRegCopy = CC.has_value();
2881 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2882 unsigned ValueBits = ValueVT.getSizeInBits();
2883 unsigned PartBits = PartVT.getSizeInBits();
2884 SDValue Val = Parts[0];
2885
2886 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
2887 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
2888 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
2889 return Val;
2890 }
2891 return SDValue();
2892}
2893
2894bool X86TargetLowering::useSoftFloat() const {
2895 return Subtarget.useSoftFloat();
2896}
2897
2898void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2899 ArgListTy &Args) const {
2900
2901 // Only relabel X86-32 for C / Stdcall CCs.
2902 if (Subtarget.is64Bit())
2903 return;
2904 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2905 return;
2906 unsigned ParamRegs = 0;
2907 if (auto *M = MF->getFunction().getParent())
2908 ParamRegs = M->getNumberRegisterParameters();
2909
2910 // Mark the first N int arguments as having reg
2911 for (auto &Arg : Args) {
2912 Type *T = Arg.Ty;
2913 if (T->isIntOrPtrTy())
2914 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2915 unsigned numRegs = 1;
2916 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2917 numRegs = 2;
2918 if (ParamRegs < numRegs)
2919 return;
2920 ParamRegs -= numRegs;
2921 Arg.IsInReg = true;
2922 }
2923 }
2924}
2925
2926const MCExpr *
2927X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2928 const MachineBasicBlock *MBB,
2929 unsigned uid,MCContext &Ctx) const{
2930 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2930, __extension__
__PRETTY_FUNCTION__))
;
2931 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2932 // entries.
2933 return MCSymbolRefExpr::create(MBB->getSymbol(),
2934 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2935}
2936
2937/// Returns relocation base for the given PIC jumptable.
2938SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2939 SelectionDAG &DAG) const {
2940 if (!Subtarget.is64Bit())
2941 // This doesn't have SDLoc associated with it, but is not really the
2942 // same as a Register.
2943 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2944 getPointerTy(DAG.getDataLayout()));
2945 return Table;
2946}
2947
2948/// This returns the relocation base for the given PIC jumptable,
2949/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2950const MCExpr *X86TargetLowering::
2951getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2952 MCContext &Ctx) const {
2953 // X86-64 uses RIP relative addressing based on the jump table label.
2954 if (Subtarget.isPICStyleRIPRel())
2955 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2956
2957 // Otherwise, the reference is relative to the PIC base.
2958 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2959}
2960
2961std::pair<const TargetRegisterClass *, uint8_t>
2962X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2963 MVT VT) const {
2964 const TargetRegisterClass *RRC = nullptr;
2965 uint8_t Cost = 1;
2966 switch (VT.SimpleTy) {
2967 default:
2968 return TargetLowering::findRepresentativeClass(TRI, VT);
2969 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2970 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2971 break;
2972 case MVT::x86mmx:
2973 RRC = &X86::VR64RegClass;
2974 break;
2975 case MVT::f32: case MVT::f64:
2976 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2977 case MVT::v4f32: case MVT::v2f64:
2978 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2979 case MVT::v8f32: case MVT::v4f64:
2980 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2981 case MVT::v16f32: case MVT::v8f64:
2982 RRC = &X86::VR128XRegClass;
2983 break;
2984 }
2985 return std::make_pair(RRC, Cost);
2986}
2987
2988unsigned X86TargetLowering::getAddressSpace() const {
2989 if (Subtarget.is64Bit())
2990 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2991 return 256;
2992}
2993
2994static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2995 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2996 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2997}
2998
2999static Constant* SegmentOffset(IRBuilderBase &IRB,
3000 int Offset, unsigned AddressSpace) {
3001 return ConstantExpr::getIntToPtr(
3002 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
3003 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
3004}
3005
3006Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
3007 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
3008 // tcbhead_t; use it instead of the usual global variable (see
3009 // sysdeps/{i386,x86_64}/nptl/tls.h)
3010 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
3011 if (Subtarget.isTargetFuchsia()) {
3012 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
3013 return SegmentOffset(IRB, 0x10, getAddressSpace());
3014 } else {
3015 unsigned AddressSpace = getAddressSpace();
3016 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
3017 // Specially, some users may customize the base reg and offset.
3018 int Offset = M->getStackProtectorGuardOffset();
3019 // If we don't set -stack-protector-guard-offset value:
3020 // %fs:0x28, unless we're using a Kernel code model, in which case
3021 // it's %gs:0x28. gs:0x14 on i386.
3022 if (Offset == INT_MAX2147483647)
3023 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
3024
3025 StringRef GuardReg = M->getStackProtectorGuardReg();
3026 if (GuardReg == "fs")
3027 AddressSpace = X86AS::FS;
3028 else if (GuardReg == "gs")
3029 AddressSpace = X86AS::GS;
3030
3031 // Use symbol guard if user specify.
3032 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
3033 if (!GuardSymb.empty()) {
3034 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
3035 if (!GV) {
3036 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
3037 : Type::getInt32Ty(M->getContext());
3038 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
3039 nullptr, GuardSymb, nullptr,
3040 GlobalValue::NotThreadLocal, AddressSpace);
3041 }
3042 return GV;
3043 }
3044
3045 return SegmentOffset(IRB, Offset, AddressSpace);
3046 }
3047 }
3048 return TargetLowering::getIRStackGuard(IRB);
3049}
3050
3051void X86TargetLowering::insertSSPDeclarations(Module &M) const {
3052 // MSVC CRT provides functionalities for stack protection.
3053 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3054 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3055 // MSVC CRT has a global variable holding security cookie.
3056 M.getOrInsertGlobal("__security_cookie",
3057 Type::getInt8PtrTy(M.getContext()));
3058
3059 // MSVC CRT has a function to validate security cookie.
3060 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
3061 "__security_check_cookie", Type::getVoidTy(M.getContext()),
3062 Type::getInt8PtrTy(M.getContext()));
3063 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
3064 F->setCallingConv(CallingConv::X86_FastCall);
3065 F->addParamAttr(0, Attribute::AttrKind::InReg);
3066 }
3067 return;
3068 }
3069
3070 StringRef GuardMode = M.getStackProtectorGuard();
3071
3072 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
3073 if ((GuardMode == "tls" || GuardMode.empty()) &&
3074 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
3075 return;
3076 TargetLowering::insertSSPDeclarations(M);
3077}
3078
3079Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
3080 // MSVC CRT has a global variable holding security cookie.
3081 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3082 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3083 return M.getGlobalVariable("__security_cookie");
3084 }
3085 return TargetLowering::getSDagStackGuard(M);
3086}
3087
3088Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
3089 // MSVC CRT has a function to validate security cookie.
3090 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3091 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3092 return M.getFunction("__security_check_cookie");
3093 }
3094 return TargetLowering::getSSPStackGuardCheck(M);
3095}
3096
3097Value *
3098X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
3099 if (Subtarget.getTargetTriple().isOSContiki())
3100 return getDefaultSafeStackPointerLocation(IRB, false);
3101
3102 // Android provides a fixed TLS slot for the SafeStack pointer. See the
3103 // definition of TLS_SLOT_SAFESTACK in
3104 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
3105 if (Subtarget.isTargetAndroid()) {
3106 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
3107 // %gs:0x24 on i386
3108 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
3109 return SegmentOffset(IRB, Offset, getAddressSpace());
3110 }
3111
3112 // Fuchsia is similar.
3113 if (Subtarget.isTargetFuchsia()) {
3114 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
3115 return SegmentOffset(IRB, 0x18, getAddressSpace());
3116 }
3117
3118 return TargetLowering::getSafeStackPointerLocation(IRB);
3119}
3120
3121//===----------------------------------------------------------------------===//
3122// Return Value Calling Convention Implementation
3123//===----------------------------------------------------------------------===//
3124
3125bool X86TargetLowering::CanLowerReturn(
3126 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3127 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3128 SmallVector<CCValAssign, 16> RVLocs;
3129 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3130 return CCInfo.CheckReturn(Outs, RetCC_X86);
3131}
3132
3133const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
3134 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
3135 return ScratchRegs;
3136}
3137
3138ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
3139 // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
3140 // tests at the moment, which is not what we expected.
3141 static const MCPhysReg RCRegs[] = {X86::MXCSR};
3142 return RCRegs;
3143}
3144
3145/// Lowers masks values (v*i1) to the local register values
3146/// \returns DAG node after lowering to register type
3147static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
3148 const SDLoc &Dl, SelectionDAG &DAG) {
3149 EVT ValVT = ValArg.getValueType();
3150
3151 if (ValVT == MVT::v1i1)
3152 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
3153 DAG.getIntPtrConstant(0, Dl));
3154
3155 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
3156 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
3157 // Two stage lowering might be required
3158 // bitcast: v8i1 -> i8 / v16i1 -> i16
3159 // anyextend: i8 -> i32 / i16 -> i32
3160 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
3161 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
3162 if (ValLoc == MVT::i32)
3163 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
3164 return ValToCopy;
3165 }
3166
3167 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
3168 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
3169 // One stage lowering is required
3170 // bitcast: v32i1 -> i32 / v64i1 -> i64
3171 return DAG.getBitcast(ValLoc, ValArg);
3172 }
3173
3174 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
3175}
3176
3177/// Breaks v64i1 value into two registers and adds the new node to the DAG
3178static void Passv64i1ArgInRegs(
3179 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
3180 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
3181 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
3182 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3182, __extension__
__PRETTY_FUNCTION__))
;
3183 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3183, __extension__
__PRETTY_FUNCTION__))
;
3184 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3184, __extension__
__PRETTY_FUNCTION__))
;
3185 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3186, __extension__
__PRETTY_FUNCTION__))
3186 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3186, __extension__
__PRETTY_FUNCTION__))
;
3187
3188 // Before splitting the value we cast it to i64
3189 Arg = DAG.getBitcast(MVT::i64, Arg);
3190
3191 // Splitting the value into two i32 types
3192 SDValue Lo, Hi;
3193 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, Dl, MVT::i32, MVT::i32);
3194
3195 // Attach the two i32 types into corresponding registers
3196 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
3197 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
3198}
3199
3200SDValue
3201X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3202 bool isVarArg,
3203 const SmallVectorImpl<ISD::OutputArg> &Outs,
3204 const SmallVectorImpl<SDValue> &OutVals,
3205 const SDLoc &dl, SelectionDAG &DAG) const {
3206 MachineFunction &MF = DAG.getMachineFunction();
3207 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3208
3209 // In some cases we need to disable registers from the default CSR list.
3210 // For example, when they are used as return registers (preserve_* and X86's
3211 // regcall) or for argument passing (X86's regcall).
3212 bool ShouldDisableCalleeSavedRegister =
3213 shouldDisableRetRegFromCSR(CallConv) ||
3214 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
3215
3216 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
3217 report_fatal_error("X86 interrupts may not return any value");
3218
3219 SmallVector<CCValAssign, 16> RVLocs;
3220 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
3221 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
3222
3223 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
3224 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
3225 ++I, ++OutsIndex) {
3226 CCValAssign &VA = RVLocs[I];
3227 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3227, __extension__
__PRETTY_FUNCTION__))
;
3228
3229 // Add the register to the CalleeSaveDisableRegs list.
3230 if (ShouldDisableCalleeSavedRegister)
3231 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
3232
3233 SDValue ValToCopy = OutVals[OutsIndex];
3234 EVT ValVT = ValToCopy.getValueType();
3235
3236 // Promote values to the appropriate types.
3237 if (VA.getLocInfo() == CCValAssign::SExt)
3238 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
3239 else if (VA.getLocInfo() == CCValAssign::ZExt)
3240 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
3241 else if (VA.getLocInfo() == CCValAssign::AExt) {
3242 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
3243 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
3244 else
3245 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
3246 }
3247 else if (VA.getLocInfo() == CCValAssign::BCvt)
3248 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
3249
3250 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3251, __extension__
__PRETTY_FUNCTION__))
3251 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3251, __extension__
__PRETTY_FUNCTION__))
;
3252
3253 // Report an error if we have attempted to return a value via an XMM
3254 // register and SSE was disabled.
3255 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3256 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3257 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3258 } else if (!Subtarget.hasSSE2() &&
3259 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3260 ValVT == MVT::f64) {
3261 // When returning a double via an XMM register, report an error if SSE2 is
3262 // not enabled.
3263 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3264 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3265 }
3266
3267 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
3268 // the RET instruction and handled by the FP Stackifier.
3269 if (VA.getLocReg() == X86::FP0 ||
3270 VA.getLocReg() == X86::FP1) {
3271 // If this is a copy from an xmm register to ST(0), use an FPExtend to
3272 // change the value to the FP stack register class.
3273 if (isScalarFPTypeInSSEReg(VA.getValVT()))
3274 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
3275 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3276 // Don't emit a copytoreg.
3277 continue;
3278 }
3279
3280 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
3281 // which is returned in RAX / RDX.
3282 if (Subtarget.is64Bit()) {
3283 if (ValVT == MVT::x86mmx) {
3284 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
3285 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
3286 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
3287 ValToCopy);
3288 // If we don't have SSE2 available, convert to v4f32 so the generated
3289 // register is legal.
3290 if (!Subtarget.hasSSE2())
3291 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
3292 }
3293 }
3294 }
3295
3296 if (VA.needsCustom()) {
3297 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3298, __extension__
__PRETTY_FUNCTION__))
3298 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3298, __extension__
__PRETTY_FUNCTION__))
;
3299
3300 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
3301 Subtarget);
3302
3303 // Add the second register to the CalleeSaveDisableRegs list.
3304 if (ShouldDisableCalleeSavedRegister)
3305 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
3306 } else {
3307 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3308 }
3309 }
3310
3311 SDValue Glue;
3312 SmallVector<SDValue, 6> RetOps;
3313 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3314 // Operand #1 = Bytes To Pop
3315 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
3316 MVT::i32));
3317
3318 // Copy the result values into the output registers.
3319 for (auto &RetVal : RetVals) {
3320 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
3321 RetOps.push_back(RetVal.second);
3322 continue; // Don't emit a copytoreg.
3323 }
3324
3325 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
3326 Glue = Chain.getValue(1);
3327 RetOps.push_back(
3328 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
3329 }
3330
3331 // Swift calling convention does not require we copy the sret argument
3332 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
3333
3334 // All x86 ABIs require that for returning structs by value we copy
3335 // the sret argument into %rax/%eax (depending on ABI) for the return.
3336 // We saved the argument into a virtual register in the entry block,
3337 // so now we copy the value out and into %rax/%eax.
3338 //
3339 // Checking Function.hasStructRetAttr() here is insufficient because the IR
3340 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
3341 // false, then an sret argument may be implicitly inserted in the SelDAG. In
3342 // either case FuncInfo->setSRetReturnReg() will have been called.
3343 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3344 // When we have both sret and another return value, we should use the
3345 // original Chain stored in RetOps[0], instead of the current Chain updated
3346 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
3347
3348 // For the case of sret and another return value, we have
3349 // Chain_0 at the function entry
3350 // Chain_1 = getCopyToReg(Chain_0) in the above loop
3351 // If we use Chain_1 in getCopyFromReg, we will have
3352 // Val = getCopyFromReg(Chain_1)
3353 // Chain_2 = getCopyToReg(Chain_1, Val) from below
3354
3355 // getCopyToReg(Chain_0) will be glued together with
3356 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
3357 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
3358 // Data dependency from Unit B to Unit A due to usage of Val in
3359 // getCopyToReg(Chain_1, Val)
3360 // Chain dependency from Unit A to Unit B
3361
3362 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
3363 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
3364 getPointerTy(MF.getDataLayout()));
3365
3366 Register RetValReg
3367 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
3368 X86::RAX : X86::EAX;
3369 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
3370 Glue = Chain.getValue(1);
3371
3372 // RAX/EAX now acts like a return value.
3373 RetOps.push_back(
3374 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
3375
3376 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
3377 // this however for preserve_most/preserve_all to minimize the number of
3378 // callee-saved registers for these CCs.
3379 if (ShouldDisableCalleeSavedRegister &&
3380 CallConv != CallingConv::PreserveAll &&
3381 CallConv != CallingConv::PreserveMost)
3382 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
3383 }
3384
3385 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3386 const MCPhysReg *I =
3387 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3388 if (I) {
3389 for (; *I; ++I) {
3390 if (X86::GR64RegClass.contains(*I))
3391 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3392 else
3393 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3393)
;
3394 }
3395 }
3396
3397 RetOps[0] = Chain; // Update chain.
3398
3399 // Add the glue if we have it.
3400 if (Glue.getNode())
3401 RetOps.push_back(Glue);
3402
3403 X86ISD::NodeType opcode = X86ISD::RET_GLUE;
3404 if (CallConv == CallingConv::X86_INTR)
3405 opcode = X86ISD::IRET;
3406 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
3407}
3408
3409bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3410 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
3411 return false;
3412
3413 SDValue TCChain = Chain;
3414 SDNode *Copy = *N->use_begin();
3415 if (Copy->getOpcode() == ISD::CopyToReg) {
3416 // If the copy has a glue operand, we conservatively assume it isn't safe to
3417 // perform a tail call.
3418 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3419 return false;
3420 TCChain = Copy->getOperand(0);
3421 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
3422 return false;
3423
3424 bool HasRet = false;
3425 for (const SDNode *U : Copy->uses()) {
3426 if (U->getOpcode() != X86ISD::RET_GLUE)
3427 return false;
3428 // If we are returning more than one value, we can definitely
3429 // not make a tail call see PR19530
3430 if (U->getNumOperands() > 4)
3431 return false;
3432 if (U->getNumOperands() == 4 &&
3433 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
3434 return false;
3435 HasRet = true;
3436 }
3437
3438 if (!HasRet)
3439 return false;
3440
3441 Chain = TCChain;
3442 return true;
3443}
3444
3445EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
3446 ISD::NodeType ExtendKind) const {
3447 MVT ReturnMVT = MVT::i32;
3448
3449 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
3450 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
3451 // The ABI does not require i1, i8 or i16 to be extended.
3452 //
3453 // On Darwin, there is code in the wild relying on Clang's old behaviour of
3454 // always extending i8/i16 return values, so keep doing that for now.
3455 // (PR26665).
3456 ReturnMVT = MVT::i8;
3457 }
3458
3459 EVT MinVT = getRegisterType(Context, ReturnMVT);
3460 return VT.bitsLT(MinVT) ? MinVT : VT;
3461}
3462
3463/// Reads two 32 bit registers and creates a 64 bit mask value.
3464/// \param VA The current 32 bit value that need to be assigned.
3465/// \param NextVA The next 32 bit value that need to be assigned.
3466/// \param Root The parent DAG node.
3467/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
3468/// glue purposes. In the case the DAG is already using
3469/// physical register instead of virtual, we should glue
3470/// our new SDValue to InGlue SDvalue.
3471/// \return a new SDvalue of size 64bit.
3472static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
3473 SDValue &Root, SelectionDAG &DAG,
3474 const SDLoc &Dl, const X86Subtarget &Subtarget,
3475 SDValue *InGlue = nullptr) {
3476 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3476, __extension__
__PRETTY_FUNCTION__))
;
3477 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3477, __extension__
__PRETTY_FUNCTION__))
;
3478 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3479, __extension__
__PRETTY_FUNCTION__))
3479 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3479, __extension__
__PRETTY_FUNCTION__))
;
3480 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3481, __extension__
__PRETTY_FUNCTION__))
3481 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3481, __extension__
__PRETTY_FUNCTION__))
;
3482 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3483, __extension__
__PRETTY_FUNCTION__))
3483 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3483, __extension__
__PRETTY_FUNCTION__))
;
3484
3485 SDValue Lo, Hi;
3486 SDValue ArgValueLo, ArgValueHi;
3487
3488 MachineFunction &MF = DAG.getMachineFunction();
3489 const TargetRegisterClass *RC = &X86::GR32RegClass;
3490
3491 // Read a 32 bit value from the registers.
3492 if (nullptr == InGlue) {
3493 // When no physical register is present,
3494 // create an intermediate virtual register.
3495 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3496 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3497 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3498 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3499 } else {
3500 // When a physical register is available read the value from it and glue
3501 // the reads together.
3502 ArgValueLo =
3503 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InGlue);
3504 *InGlue = ArgValueLo.getValue(2);
3505 ArgValueHi =
3506 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InGlue);
3507 *InGlue = ArgValueHi.getValue(2);
3508 }
3509
3510 // Convert the i32 type into v32i1 type.
3511 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
3512
3513 // Convert the i32 type into v32i1 type.
3514 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
3515
3516 // Concatenate the two values together.
3517 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
3518}
3519
3520/// The function will lower a register of various sizes (8/16/32/64)
3521/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
3522/// \returns a DAG node contains the operand after lowering to mask type.
3523static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3524 const EVT &ValLoc, const SDLoc &Dl,
3525 SelectionDAG &DAG) {
3526 SDValue ValReturned = ValArg;
3527
3528 if (ValVT == MVT::v1i1)
3529 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3530
3531 if (ValVT == MVT::v64i1) {
3532 // In 32 bit machine, this case is handled by getv64i1Argument
3533 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3533, __extension__
__PRETTY_FUNCTION__))
;
3534 // In 64 bit machine, There is no need to truncate the value only bitcast
3535 } else {
3536 MVT maskLen;
3537 switch (ValVT.getSimpleVT().SimpleTy) {
3538 case MVT::v8i1:
3539 maskLen = MVT::i8;
3540 break;
3541 case MVT::v16i1:
3542 maskLen = MVT::i16;
3543 break;
3544 case MVT::v32i1:
3545 maskLen = MVT::i32;
3546 break;
3547 default:
3548 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3548)
;
3549 }
3550
3551 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3552 }
3553 return DAG.getBitcast(ValVT, ValReturned);
3554}
3555
3556/// Lower the result values of a call into the
3557/// appropriate copies out of appropriate physical registers.
3558///
3559SDValue X86TargetLowering::LowerCallResult(
3560 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
3561 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3562 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3563 uint32_t *RegMask) const {
3564
3565 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3566 // Assign locations to each value returned by this call.
3567 SmallVector<CCValAssign, 16> RVLocs;
3568 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3569 *DAG.getContext());
3570 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3571
3572 // Copy all of the result registers out of their specified physreg.
3573 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3574 ++I, ++InsIndex) {
3575 CCValAssign &VA = RVLocs[I];
3576 EVT CopyVT = VA.getLocVT();
3577
3578 // In some calling conventions we need to remove the used registers
3579 // from the register mask.
3580 if (RegMask) {
3581 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3582 SubRegs.isValid(); ++SubRegs)
3583 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3584 }
3585
3586 // Report an error if there was an attempt to return FP values via XMM
3587 // registers.
3588 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3589 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3590 if (VA.getLocReg() == X86::XMM1)
3591 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3592 else
3593 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3594 } else if (!Subtarget.hasSSE2() &&
3595 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3596 CopyVT == MVT::f64) {
3597 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3598 if (VA.getLocReg() == X86::XMM1)
3599 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3600 else
3601 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3602 }
3603
3604 // If we prefer to use the value in xmm registers, copy it out as f80 and
3605 // use a truncate to move it from fp stack reg to xmm reg.
3606 bool RoundAfterCopy = false;
3607 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3608 isScalarFPTypeInSSEReg(VA.getValVT())) {
3609 if (!Subtarget.hasX87())
3610 report_fatal_error("X87 register return with X87 disabled");
3611 CopyVT = MVT::f80;
3612 RoundAfterCopy = (CopyVT != VA.getLocVT());
3613 }
3614
3615 SDValue Val;
3616 if (VA.needsCustom()) {
3617 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3618, __extension__
__PRETTY_FUNCTION__))
3618 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3618, __extension__
__PRETTY_FUNCTION__))
;
3619 Val =
3620 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
3621 } else {
3622 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
3623 .getValue(1);
3624 Val = Chain.getValue(0);
3625 InGlue = Chain.getValue(2);
3626 }
3627
3628 if (RoundAfterCopy)
3629 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3630 // This truncation won't change the value.
3631 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
3632
3633 if (VA.isExtInLoc()) {
3634 if (VA.getValVT().isVector() &&
3635 VA.getValVT().getScalarType() == MVT::i1 &&
3636 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3637 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3638 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3639 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3640 } else
3641 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3642 }
3643
3644 if (VA.getLocInfo() == CCValAssign::BCvt)
3645 Val = DAG.getBitcast(VA.getValVT(), Val);
3646
3647 InVals.push_back(Val);
3648 }
3649
3650 return Chain;
3651}
3652
3653//===----------------------------------------------------------------------===//
3654// C & StdCall & Fast Calling Convention implementation
3655//===----------------------------------------------------------------------===//
3656// StdCall calling convention seems to be standard for many Windows' API
3657// routines and around. It differs from C calling convention just a little:
3658// callee should clean up the stack, not caller. Symbols should be also
3659// decorated in some fancy way :) It doesn't support any vector arguments.
3660// For info on fast calling convention see Fast Calling Convention (tail call)
3661// implementation LowerX86_32FastCCCallTo.
3662
3663/// Determines whether Args, either a set of outgoing arguments to a call, or a
3664/// set of incoming args of a call, contains an sret pointer that the callee
3665/// pops
3666template <typename T>
3667static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
3668 const X86Subtarget &Subtarget) {
3669 // Not C++20 (yet), so no concepts available.
3670 static_assert(std::is_same_v<T, ISD::OutputArg> ||
3671 std::is_same_v<T, ISD::InputArg>,
3672 "requires ISD::OutputArg or ISD::InputArg");
3673
3674 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
3675 // for most compilations.
3676 if (!Subtarget.is32Bit())
3677 return false;
3678
3679 if (Args.empty())
3680 return false;
3681
3682 // Most calls do not have an sret argument, check the arg next.
3683 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
3684 if (!Flags.isSRet() || Flags.isInReg())
3685 return false;
3686
3687 // The MSVCabi does not pop the sret.
3688 if (Subtarget.getTargetTriple().isOSMSVCRT())
3689 return false;
3690
3691 // MCUs don't pop the sret
3692 if (Subtarget.isTargetMCU())
3693 return false;
3694
3695 // Callee pops argument
3696 return true;
3697}
3698
3699/// Make a copy of an aggregate at address specified by "Src" to address
3700/// "Dst" with size and alignment information specified by the specific
3701/// parameter attribute. The copy will be passed as a byval function parameter.
3702static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3703 SDValue Chain, ISD::ArgFlagsTy Flags,
3704 SelectionDAG &DAG, const SDLoc &dl) {
3705 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3706
3707 return DAG.getMemcpy(
3708 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3709 /*isVolatile*/ false, /*AlwaysInline=*/true,
3710 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3711}
3712
3713/// Return true if the calling convention is one that we can guarantee TCO for.
3714static bool canGuaranteeTCO(CallingConv::ID CC) {
3715 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3716 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3717 CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
3718}
3719
3720/// Return true if we might ever do TCO for calls with this calling convention.
3721static bool mayTailCallThisCC(CallingConv::ID CC) {
3722 switch (CC) {
3723 // C calling conventions:
3724 case CallingConv::C:
3725 case CallingConv::Win64:
3726 case CallingConv::X86_64_SysV:
3727 // Callee pop conventions:
3728 case CallingConv::X86_ThisCall:
3729 case CallingConv::X86_StdCall:
3730 case CallingConv::X86_VectorCall:
3731 case CallingConv::X86_FastCall:
3732 // Swift:
3733 case CallingConv::Swift:
3734 return true;
3735 default:
3736 return canGuaranteeTCO(CC);
3737 }
3738}
3739
3740/// Return true if the function is being made into a tailcall target by
3741/// changing its ABI.
3742static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3743 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3744 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3745}
3746
3747bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3748 if (!CI->isTailCall())
3749 return false;
3750
3751 CallingConv::ID CalleeCC = CI->getCallingConv();
3752 if (!mayTailCallThisCC(CalleeCC))
3753 return false;
3754
3755 return true;
3756}
3757
3758SDValue
3759X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3760 const SmallVectorImpl<ISD::InputArg> &Ins,
3761 const SDLoc &dl, SelectionDAG &DAG,
3762 const CCValAssign &VA,
3763 MachineFrameInfo &MFI, unsigned i) const {
3764 // Create the nodes corresponding to a load from this parameter slot.
3765 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3766 bool AlwaysUseMutable = shouldGuaranteeTCO(
3767 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3768 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3769 EVT ValVT;
3770 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3771
3772 // If value is passed by pointer we have address passed instead of the value
3773 // itself. No need to extend if the mask value and location share the same
3774 // absolute size.
3775 bool ExtendedInMem =
3776 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3777 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3778
3779 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3780 ValVT = VA.getLocVT();
3781 else
3782 ValVT = VA.getValVT();
3783
3784 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3785 // changed with more analysis.
3786 // In case of tail call optimization mark all arguments mutable. Since they
3787 // could be overwritten by lowering of arguments in case of a tail call.
3788 if (Flags.isByVal()) {
3789 unsigned Bytes = Flags.getByValSize();
3790 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3791
3792 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3793 // can be improved with deeper analysis.
3794 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3795 /*isAliased=*/true);
3796 return DAG.getFrameIndex(FI, PtrVT);
3797 }
3798
3799 EVT ArgVT = Ins[i].ArgVT;
3800
3801 // If this is a vector that has been split into multiple parts, and the
3802 // scalar size of the parts don't match the vector element size, then we can't
3803 // elide the copy. The parts will have padding between them instead of being
3804 // packed like a vector.
3805 bool ScalarizedAndExtendedVector =
3806 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3807 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3808
3809 // This is an argument in memory. We might be able to perform copy elision.
3810 // If the argument is passed directly in memory without any extension, then we
3811 // can perform copy elision. Large vector types, for example, may be passed
3812 // indirectly by pointer.
3813 if (Flags.isCopyElisionCandidate() &&
3814 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3815 !ScalarizedAndExtendedVector) {
3816 SDValue PartAddr;
3817 if (Ins[i].PartOffset == 0) {
3818 // If this is a one-part value or the first part of a multi-part value,
3819 // create a stack object for the entire argument value type and return a
3820 // load from our portion of it. This assumes that if the first part of an
3821 // argument is in memory, the rest will also be in memory.
3822 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3823 /*IsImmutable=*/false);
3824 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3825 return DAG.getLoad(
3826 ValVT, dl, Chain, PartAddr,
3827 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3828 } else {
3829 // This is not the first piece of an argument in memory. See if there is
3830 // already a fixed stack object including this offset. If so, assume it
3831 // was created by the PartOffset == 0 branch above and create a load from
3832 // the appropriate offset into it.
3833 int64_t PartBegin = VA.getLocMemOffset();
3834 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3835 int FI = MFI.getObjectIndexBegin();
3836 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3837 int64_t ObjBegin = MFI.getObjectOffset(FI);
3838 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3839 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3840 break;
3841 }
3842 if (MFI.isFixedObjectIndex(FI)) {
3843 SDValue Addr =
3844 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3845 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3846 return DAG.getLoad(
3847 ValVT, dl, Chain, Addr,
3848 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3849 Ins[i].PartOffset));
3850 }
3851 }
3852 }
3853
3854 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3855 VA.getLocMemOffset(), isImmutable);
3856
3857 // Set SExt or ZExt flag.
3858 if (VA.getLocInfo() == CCValAssign::ZExt) {
3859 MFI.setObjectZExt(FI, true);
3860 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3861 MFI.setObjectSExt(FI, true);
3862 }
3863
3864 MaybeAlign Alignment;
3865 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
3866 ValVT != MVT::f80)
3867 Alignment = MaybeAlign(4);
3868 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3869 SDValue Val = DAG.getLoad(
3870 ValVT, dl, Chain, FIN,
3871 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3872 Alignment);
3873 return ExtendedInMem
3874 ? (VA.getValVT().isVector()
3875 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3876 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3877 : Val;
3878}
3879
3880// FIXME: Get this from tablegen.
3881static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3882 const X86Subtarget &Subtarget) {
3883 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3883, __extension__ __PRETTY_FUNCTION__))
;
3884
3885 if (Subtarget.isCallingConvWin64(CallConv)) {
3886 static const MCPhysReg GPR64ArgRegsWin64[] = {
3887 X86::RCX, X86::RDX, X86::R8, X86::R9
3888 };
3889 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3890 }
3891
3892 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3893 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3894 };
3895 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3896}
3897
3898// FIXME: Get this from tablegen.
3899static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3900 CallingConv::ID CallConv,
3901 const X86Subtarget &Subtarget) {
3902 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3902, __extension__ __PRETTY_FUNCTION__))
;
3903 if (Subtarget.isCallingConvWin64(CallConv)) {
3904 // The XMM registers which might contain var arg parameters are shadowed
3905 // in their paired GPR. So we only need to save the GPR to their home
3906 // slots.
3907 // TODO: __vectorcall will change this.
3908 return std::nullopt;
3909 }
3910
3911 bool isSoftFloat = Subtarget.useSoftFloat();
3912 if (isSoftFloat || !Subtarget.hasSSE1())
3913 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3914 // registers.
3915 return std::nullopt;
3916
3917 static const MCPhysReg XMMArgRegs64Bit[] = {
3918 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3919 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3920 };
3921 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3922}
3923
3924#ifndef NDEBUG
3925static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3926 return llvm::is_sorted(
3927 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3928 return A.getValNo() < B.getValNo();
3929 });
3930}
3931#endif
3932
3933namespace {
3934/// This is a helper class for lowering variable arguments parameters.
3935class VarArgsLoweringHelper {
3936public:
3937 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3938 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3939 CallingConv::ID CallConv, CCState &CCInfo)
3940 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3941 TheMachineFunction(DAG.getMachineFunction()),
3942 TheFunction(TheMachineFunction.getFunction()),
3943 FrameInfo(TheMachineFunction.getFrameInfo()),
3944 FrameLowering(*Subtarget.getFrameLowering()),
3945 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3946 CCInfo(CCInfo) {}
3947
3948 // Lower variable arguments parameters.
3949 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3950
3951private:
3952 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3953
3954 void forwardMustTailParameters(SDValue &Chain);
3955
3956 bool is64Bit() const { return Subtarget.is64Bit(); }
3957 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3958
3959 X86MachineFunctionInfo *FuncInfo;
3960 const SDLoc &DL;
3961 SelectionDAG &DAG;
3962 const X86Subtarget &Subtarget;
3963 MachineFunction &TheMachineFunction;
3964 const Function &TheFunction;
3965 MachineFrameInfo &FrameInfo;
3966 const TargetFrameLowering &FrameLowering;
3967 const TargetLowering &TargLowering;
3968 CallingConv::ID CallConv;
3969 CCState &CCInfo;
3970};
3971} // namespace
3972
3973void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3974 SDValue &Chain, unsigned StackSize) {
3975 // If the function takes variable number of arguments, make a frame index for
3976 // the start of the first vararg value... for expansion of llvm.va_start. We
3977 // can skip this if there are no va_start calls.
3978 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3979 CallConv != CallingConv::X86_ThisCall)) {
3980 FuncInfo->setVarArgsFrameIndex(
3981 FrameInfo.CreateFixedObject(1, StackSize, true));
3982 }
3983
3984 // 64-bit calling conventions support varargs and register parameters, so we
3985 // have to do extra work to spill them in the prologue.
3986 if (is64Bit()) {
3987 // Find the first unallocated argument registers.
3988 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3989 ArrayRef<MCPhysReg> ArgXMMs =
3990 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3991 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3992 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3993
3994 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3995, __extension__
__PRETTY_FUNCTION__))
3995 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3995, __extension__
__PRETTY_FUNCTION__))
;
3996
3997 if (isWin64()) {
3998 // Get to the caller-allocated home save location. Add 8 to account
3999 // for the return address.
4000 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
4001 FuncInfo->setRegSaveFrameIndex(
4002 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
4003 // Fixup to set vararg frame on shadow area (4 x i64).
4004 if (NumIntRegs < 4)
4005 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
4006 } else {
4007 // For X86-64, if there are vararg parameters that are passed via
4008 // registers, then we must store them to their spots on the stack so
4009 // they may be loaded by dereferencing the result of va_next.
4010 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
4011 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
4012 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
4013 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
4014 }
4015
4016 SmallVector<SDValue, 6>
4017 LiveGPRs; // list of SDValue for GPR registers keeping live input value
4018 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
4019 // keeping live input value
4020 SDValue ALVal; // if applicable keeps SDValue for %al register
4021
4022 // Gather all the live in physical registers.
4023 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
4024 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
4025 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
4026 }
4027 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
4028 if (!AvailableXmms.empty()) {
4029 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
4030 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
4031 for (MCPhysReg Reg : AvailableXmms) {
4032 // FastRegisterAllocator spills virtual registers at basic
4033 // block boundary. That leads to usages of xmm registers
4034 // outside of check for %al. Pass physical registers to
4035 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
4036 TheMachineFunction.getRegInfo().addLiveIn(Reg);
4037 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
4038 }
4039 }
4040
4041 // Store the integer parameter registers.
4042 SmallVector<SDValue, 8> MemOps;
4043 SDValue RSFIN =
4044 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
4045 TargLowering.getPointerTy(DAG.getDataLayout()));
4046 unsigned Offset = FuncInfo->getVarArgsGPOffset();
4047 for (SDValue Val : LiveGPRs) {
4048 SDValue FIN = DAG.getNode(ISD::ADD, DL,
4049 TargLowering.getPointerTy(DAG.getDataLayout()),
4050 RSFIN, DAG.getIntPtrConstant(Offset, DL));
4051 SDValue Store =
4052 DAG.getStore(Val.getValue(1), DL, Val, FIN,
4053 MachinePointerInfo::getFixedStack(
4054 DAG.getMachineFunction(),
4055 FuncInfo->getRegSaveFrameIndex(), Offset));
4056 MemOps.push_back(Store);
4057 Offset += 8;
4058 }
4059
4060 // Now store the XMM (fp + vector) parameter registers.
4061 if (!LiveXMMRegs.empty()) {
4062 SmallVector<SDValue, 12> SaveXMMOps;
4063 SaveXMMOps.push_back(Chain);
4064 SaveXMMOps.push_back(ALVal);
4065 SaveXMMOps.push_back(RSFIN);
4066 SaveXMMOps.push_back(
4067 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
4068 llvm::append_range(SaveXMMOps, LiveXMMRegs);
4069 MachineMemOperand *StoreMMO =
4070 DAG.getMachineFunction().getMachineMemOperand(
4071 MachinePointerInfo::getFixedStack(
4072 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
4073 Offset),
4074 MachineMemOperand::MOStore, 128, Align(16));
4075 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
4076 DL, DAG.getVTList(MVT::Other),
4077 SaveXMMOps, MVT::i8, StoreMMO));
4078 }
4079
4080 if (!MemOps.empty())
4081 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4082 }
4083}
4084
4085void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
4086 // Find the largest legal vector type.
4087 MVT VecVT = MVT::Other;
4088 // FIXME: Only some x86_32 calling conventions support AVX512.
4089 if (Subtarget.useAVX512Regs() &&
4090 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
4091 CallConv == CallingConv::Intel_OCL_BI)))
4092 VecVT = MVT::v16f32;
4093 else if (Subtarget.hasAVX())
4094 VecVT = MVT::v8f32;
4095 else if (Subtarget.hasSSE2())
4096 VecVT = MVT::v4f32;
4097
4098 // We forward some GPRs and some vector types.
4099 SmallVector<MVT, 2> RegParmTypes;
4100 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
4101 RegParmTypes.push_back(IntVT);
4102 if (VecVT != MVT::Other)
4103 RegParmTypes.push_back(VecVT);
4104
4105 // Compute the set of forwarded registers. The rest are scratch.
4106 SmallVectorImpl<ForwardedRegister> &Forwards =
4107 FuncInfo->getForwardedMustTailRegParms();
4108 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
4109
4110 // Forward AL for SysV x86_64 targets, since it is used for varargs.
4111 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
4112 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
4113 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
4114 }
4115
4116 // Copy all forwards from physical to virtual registers.
4117 for (ForwardedRegister &FR : Forwards) {
4118 // FIXME: Can we use a less constrained schedule?
4119 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
4120 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
4121 TargLowering.getRegClassFor(FR.VT));
4122 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
4123 }
4124}
4125
4126void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
4127 unsigned StackSize) {
4128 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
4129 // If necessary, it would be set into the correct value later.
4130 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
4131 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4132
4133 if (FrameInfo.hasVAStart())
4134 createVarArgAreaAndStoreRegisters(Chain, StackSize);
4135
4136 if (FrameInfo.hasMustTailInVarArgFunc())
4137 forwardMustTailParameters(Chain);
4138}
4139
4140SDValue X86TargetLowering::LowerFormalArguments(
4141 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4142 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4143 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4144 MachineFunction &MF = DAG.getMachineFunction();
4145 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4146
4147 const Function &F = MF.getFunction();
4148 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
4149 F.getName() == "main")
4150 FuncInfo->setForceFramePointer(true);
4151
4152 MachineFrameInfo &MFI = MF.getFrameInfo();
4153 bool Is64Bit = Subtarget.is64Bit();
4154 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4155
4156 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4158, __extension__
__PRETTY_FUNCTION__))
4157 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4158, __extension__
__PRETTY_FUNCTION__))
4158 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4158, __extension__
__PRETTY_FUNCTION__))
;
4159
4160 // Assign locations to all of the incoming arguments.
4161 SmallVector<CCValAssign, 16> ArgLocs;
4162 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4163
4164 // Allocate shadow area for Win64.
4165 if (IsWin64)
4166 CCInfo.AllocateStack(32, Align(8));
4167
4168 CCInfo.AnalyzeArguments(Ins, CC_X86);
4169
4170 // In vectorcall calling convention a second pass is required for the HVA
4171 // types.
4172 if (CallingConv::X86_VectorCall == CallConv) {
4173 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
4174 }
4175
4176 // The next loop assumes that the locations are in the same order of the
4177 // input arguments.
4178 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4179, __extension__
__PRETTY_FUNCTION__))
4179 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4179, __extension__
__PRETTY_FUNCTION__))
;
4180
4181 SDValue ArgValue;
4182 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
4183 ++I, ++InsIndex) {
4184 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4184, __extension__
__PRETTY_FUNCTION__))
;
4185 CCValAssign &VA = ArgLocs[I];
4186
4187 if (VA.isRegLoc()) {
4188 EVT RegVT = VA.getLocVT();
4189 if (VA.needsCustom()) {
4190 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4192, __extension__
__PRETTY_FUNCTION__))
4191 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4192, __extension__
__PRETTY_FUNCTION__))
4192 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4192, __extension__
__PRETTY_FUNCTION__))
;
4193
4194 // v64i1 values, in regcall calling convention, that are
4195 // compiled to 32 bit arch, are split up into two registers.
4196 ArgValue =
4197 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
4198 } else {
4199 const TargetRegisterClass *RC;
4200 if (RegVT == MVT::i8)
4201 RC = &X86::GR8RegClass;
4202 else if (RegVT == MVT::i16)
4203 RC = &X86::GR16RegClass;
4204 else if (RegVT == MVT::i32)
4205 RC = &X86::GR32RegClass;
4206 else if (Is64Bit && RegVT == MVT::i64)
4207 RC = &X86::GR64RegClass;
4208 else if (RegVT == MVT::f16)
4209 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
4210 else if (RegVT == MVT::f32)
4211 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
4212 else if (RegVT == MVT::f64)
4213 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
4214 else if (RegVT == MVT::f80)
4215 RC = &X86::RFP80RegClass;
4216 else if (RegVT == MVT::f128)
4217 RC = &X86::VR128RegClass;
4218 else if (RegVT.is512BitVector())
4219 RC = &X86::VR512RegClass;
4220 else if (RegVT.is256BitVector())
4221 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
4222 else if (RegVT.is128BitVector())
4223 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
4224 else if (RegVT == MVT::x86mmx)
4225 RC = &X86::VR64RegClass;
4226 else if (RegVT == MVT::v1i1)
4227 RC = &X86::VK1RegClass;
4228 else if (RegVT == MVT::v8i1)
4229 RC = &X86::VK8RegClass;
4230 else if (RegVT == MVT::v16i1)
4231 RC = &X86::VK16RegClass;
4232 else if (RegVT == MVT::v32i1)
4233 RC = &X86::VK32RegClass;
4234 else if (RegVT == MVT::v64i1)
4235 RC = &X86::VK64RegClass;
4236 else
4237 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4237)
;
4238
4239 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4240 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4241 }
4242
4243 // If this is an 8 or 16-bit value, it is really passed promoted to 32
4244 // bits. Insert an assert[sz]ext to capture this, then truncate to the
4245 // right size.
4246 if (VA.getLocInfo() == CCValAssign::SExt)
4247 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4248 DAG.getValueType(VA.getValVT()));
4249 else if (VA.getLocInfo() == CCValAssign::ZExt)
4250 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4251 DAG.getValueType(VA.getValVT()));
4252 else if (VA.getLocInfo() == CCValAssign::BCvt)
4253 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
4254
4255 if (VA.isExtInLoc()) {
4256 // Handle MMX values passed in XMM regs.
4257 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
4258 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
4259 else if (VA.getValVT().isVector() &&
4260 VA.getValVT().getScalarType() == MVT::i1 &&
4261 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
4262 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
4263 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
4264 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
4265 } else
4266 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4267 }
4268 } else {
4269 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4269, __extension__ __PRETTY_FUNCTION__))
;
4270 ArgValue =
4271 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
4272 }
4273
4274 // If value is passed via pointer - do a load.
4275 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
4276 ArgValue =
4277 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
4278
4279 InVals.push_back(ArgValue);
4280 }
4281
4282 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
4283 if (Ins[I].Flags.isSwiftAsync()) {
4284 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
4285 if (Subtarget.is64Bit())
4286 X86FI->setHasSwiftAsyncContext(true);
4287 else {
4288 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
4289 X86FI->setSwiftAsyncContextFrameIdx(FI);
4290 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
4291 DAG.getFrameIndex(FI, MVT::i32),
4292 MachinePointerInfo::getFixedStack(MF, FI));
4293 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
4294 }
4295 }
4296
4297 // Swift calling convention does not require we copy the sret argument
4298 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
4299 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
4300 continue;
4301
4302 // All x86 ABIs require that for returning structs by value we copy the
4303 // sret argument into %rax/%eax (depending on ABI) for the return. Save
4304 // the argument into a virtual register so that we can access it from the
4305 // return points.
4306 if (Ins[I].Flags.isSRet()) {
4307 assert(!FuncInfo->getSRetReturnReg() &&(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4308, __extension__
__PRETTY_FUNCTION__))
4308 "SRet return has already been set")(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4308, __extension__
__PRETTY_FUNCTION__))
;
4309 MVT PtrTy = getPointerTy(DAG.getDataLayout());
4310 Register Reg =
4311 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
4312 FuncInfo->setSRetReturnReg(Reg);
4313 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
4314 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
4315 break;
4316 }
4317 }
4318
4319 unsigned StackSize = CCInfo.getNextStackOffset();
4320 // Align stack specially for tail calls.
4321 if (shouldGuaranteeTCO(CallConv,
4322 MF.getTarget().Options.GuaranteedTailCallOpt))
4323 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
4324
4325 if (IsVarArg)
4326 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
4327 .lowerVarArgsParameters(Chain, StackSize);
4328
4329 // Some CCs need callee pop.
4330 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
4331 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4332 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
4333 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
4334 // X86 interrupts must pop the error code (and the alignment padding) if
4335 // present.
4336 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
4337 } else {
4338 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
4339 // If this is an sret function, the return should pop the hidden pointer.
4340 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
4341 FuncInfo->setBytesToPopOnReturn(4);
4342 }
4343
4344 if (!Is64Bit) {
4345 // RegSaveFrameIndex is X86-64 only.
4346 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4347 }
4348
4349 FuncInfo->setArgumentStackSize(StackSize);
4350
4351 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
4352 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
4353 if (Personality == EHPersonality::CoreCLR) {
4354 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "llvm/lib/Target/X86/X86ISelLowering.cpp", 4354,
__extension__ __PRETTY_FUNCTION__))
;
4355 // TODO: Add a mechanism to frame lowering that will allow us to indicate
4356 // that we'd prefer this slot be allocated towards the bottom of the frame
4357 // (i.e. near the stack pointer after allocating the frame). Every
4358 // funclet needs a copy of this slot in its (mostly empty) frame, and the
4359 // offset from the bottom of this and each funclet's frame must be the
4360 // same, so the size of funclets' (mostly empty) frames is dictated by
4361 // how far this slot is from the bottom (since they allocate just enough
4362 // space to accommodate holding this slot at the correct offset).
4363 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
4364 EHInfo->PSPSymFrameIdx = PSPSymFI;
4365 }
4366 }
4367
4368 if (shouldDisableArgRegFromCSR(CallConv) ||
4369 F.hasFnAttribute("no_caller_saved_registers")) {
4370 MachineRegisterInfo &MRI = MF.getRegInfo();
4371 for (std::pair<Register, Register> Pair : MRI.liveins())
4372 MRI.disableCalleeSavedRegister(Pair.first);
4373 }
4374
4375 return Chain;
4376}
4377
4378SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
4379 SDValue Arg, const SDLoc &dl,
4380 SelectionDAG &DAG,
4381 const CCValAssign &VA,
4382 ISD::ArgFlagsTy Flags,
4383 bool isByVal) const {
4384 unsigned LocMemOffset = VA.getLocMemOffset();
4385 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4386 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4387 StackPtr, PtrOff);
4388 if (isByVal)
4389 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
4390
4391 MaybeAlign Alignment;
4392 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
4393 Arg.getSimpleValueType() != MVT::f80)
4394 Alignment = MaybeAlign(4);
4395 return DAG.getStore(
4396 Chain, dl, Arg, PtrOff,
4397 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
4398 Alignment);
4399}
4400
4401/// Emit a load of return address if tail call
4402/// optimization is performed and it is required.
4403SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
4404 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
4405 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
4406 // Adjust the Return address stack slot.
4407 EVT VT = getPointerTy(DAG.getDataLayout());
4408 OutRetAddr = getReturnAddressFrameIndex(DAG);
4409
4410 // Load the "old" Return address.
4411 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
4412 return SDValue(OutRetAddr.getNode(), 1);
4413}
4414
4415/// Emit a store of the return address if tail call
4416/// optimization is performed and it is required (FPDiff!=0).
4417static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
4418 SDValue Chain, SDValue RetAddrFrIdx,
4419 EVT PtrVT, unsigned SlotSize,
4420 int FPDiff, const SDLoc &dl) {
4421 // Store the return address to the appropriate stack slot.
4422 if (!FPDiff) return Chain;
4423 // Calculate the new stack slot for the return address.
4424 int NewReturnAddrFI =
4425 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
4426 false);
4427 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
4428 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
4429 MachinePointerInfo::getFixedStack(
4430 DAG.getMachineFunction(), NewReturnAddrFI));
4431 return Chain;
4432}
4433
4434/// Returns a vector_shuffle mask for an movs{s|d}, movd
4435/// operation of specified width.
4436static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
4437 SDValue V2) {
4438 unsigned NumElems = VT.getVectorNumElements();
4439 SmallVector<int, 8> Mask;
4440 Mask.push_back(NumElems);
4441 for (unsigned i = 1; i != NumElems; ++i)
4442 Mask.push_back(i);
4443 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
4444}
4445
4446SDValue
4447X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4448 SmallVectorImpl<SDValue> &InVals) const {
4449 SelectionDAG &DAG = CLI.DAG;
4450 SDLoc &dl = CLI.DL;
4451 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4452 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4453 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4454 SDValue Chain = CLI.Chain;
4455 SDValue Callee = CLI.Callee;
4456 CallingConv::ID CallConv = CLI.CallConv;
4457 bool &isTailCall = CLI.IsTailCall;
4458 bool isVarArg = CLI.IsVarArg;
4459 const auto *CB = CLI.CB;
4460
4461 MachineFunction &MF = DAG.getMachineFunction();
4462 bool Is64Bit = Subtarget.is64Bit();
4463 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4464 bool IsSibcall = false;
4465 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
4466 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
4467 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
4468 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
4469 bool HasNCSR = (CB && isa<CallInst>(CB) &&
4470 CB->hasFnAttr("no_caller_saved_registers"));
4471 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
4472 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
4473 bool IsCFICall = IsIndirectCall && CLI.CFIType;
4474 const Module *M = MF.getMMI().getModule();
4475 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
4476
4477 MachineFunction::CallSiteInfo CSInfo;
4478 if (CallConv == CallingConv::X86_INTR)
4479 report_fatal_error("X86 interrupts may not be called directly");
4480
4481 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
4482 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
4483 // If we are using a GOT, disable tail calls to external symbols with
4484 // default visibility. Tail calling such a symbol requires using a GOT
4485 // relocation, which forces early binding of the symbol. This breaks code
4486 // that require lazy function symbol resolution. Using musttail or
4487 // GuaranteedTailCallOpt will override this.
4488 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4489 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
4490 G->getGlobal()->hasDefaultVisibility()))
4491 isTailCall = false;
4492 }
4493
4494 if (isTailCall && !IsMustTail) {
4495 // Check if it's really possible to do a tail call.
4496 isTailCall = IsEligibleForTailCallOptimization(
4497 Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
4498 Ins, DAG);
4499
4500 // Sibcalls are automatically detected tailcalls which do not require
4501 // ABI changes.
4502 if (!IsGuaranteeTCO && isTailCall)
4503 IsSibcall = true;
4504
4505 if (isTailCall)
4506 ++NumTailCalls;
4507 }
4508
4509 if (IsMustTail && !isTailCall)
4510 report_fatal_error("failed to perform tail call elimination on a call "
4511 "site marked musttail");
4512
4513 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4514, __extension__
__PRETTY_FUNCTION__))
4514 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4514, __extension__
__PRETTY_FUNCTION__))
;
4515
4516 // Analyze operands of the call, assigning locations to each operand.
4517 SmallVector<CCValAssign, 16> ArgLocs;
4518 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4519
4520 // Allocate shadow area for Win64.
4521 if (IsWin64)
4522 CCInfo.AllocateStack(32, Align(8));
4523
4524 CCInfo.AnalyzeArguments(Outs, CC_X86);
4525
4526 // In vectorcall calling convention a second pass is required for the HVA
4527 // types.
4528 if (CallingConv::X86_VectorCall == CallConv) {
4529 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
4530 }
4531
4532 // Get a count of how many bytes are to be pushed on the stack.
4533 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
4534 if (IsSibcall)
4535 // This is a sibcall. The memory operands are available in caller's
4536 // own caller's stack.
4537 NumBytes = 0;
4538 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4539 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4540
4541 int FPDiff = 0;
4542 if (isTailCall &&
4543 shouldGuaranteeTCO(CallConv,
4544 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4545 // Lower arguments at fp - stackoffset + fpdiff.
4546 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4547
4548 FPDiff = NumBytesCallerPushed - NumBytes;
4549
4550 // Set the delta of movement of the returnaddr stackslot.
4551 // But only set if delta is greater than previous delta.
4552 if (FPDiff < X86Info->getTCReturnAddrDelta())
4553 X86Info->setTCReturnAddrDelta(FPDiff);
4554 }
4555
4556 unsigned NumBytesToPush = NumBytes;
4557 unsigned NumBytesToPop = NumBytes;
4558
4559 // If we have an inalloca argument, all stack space has already been allocated
4560 // for us and be right at the top of the stack. We don't support multiple
4561 // arguments passed in memory when using inalloca.
4562 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4563 NumBytesToPush = 0;
4564 if (!ArgLocs.back().isMemLoc())
4565 report_fatal_error("cannot use inalloca attribute on a register "
4566 "parameter");
4567 if (ArgLocs.back().getLocMemOffset() != 0)
4568 report_fatal_error("any parameter with the inalloca attribute must be "
4569 "the only memory argument");
4570 } else if (CLI.IsPreallocated) {
4571 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4573, __extension__
__PRETTY_FUNCTION__))
4572 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4573, __extension__
__PRETTY_FUNCTION__))
4573 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4573, __extension__
__PRETTY_FUNCTION__))
;
4574 SmallVector<size_t, 4> PreallocatedOffsets;
4575 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4576 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4577 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4578 }
4579 }
4580 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4581 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4582 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4583 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4584 NumBytesToPush = 0;
4585 }
4586
4587 if (!IsSibcall && !IsMustTail)
4588 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4589 NumBytes - NumBytesToPush, dl);
4590
4591 SDValue RetAddrFrIdx;
4592 // Load return address for tail calls.
4593 if (isTailCall && FPDiff)
4594 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4595 Is64Bit, FPDiff, dl);
4596
4597 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4598 SmallVector<SDValue, 8> MemOpChains;
4599 SDValue StackPtr;
4600
4601 // The next loop assumes that the locations are in the same order of the
4602 // input arguments.
4603 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4604, __extension__
__PRETTY_FUNCTION__))
4604 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4604, __extension__
__PRETTY_FUNCTION__))
;
4605
4606 // Walk the register/memloc assignments, inserting copies/loads. In the case
4607 // of tail call optimization arguments are handle later.
4608 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4609 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4610 ++I, ++OutIndex) {
4611 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4611, __extension__
__PRETTY_FUNCTION__))
;
4612 // Skip inalloca/preallocated arguments, they have already been written.
4613 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4614 if (Flags.isInAlloca() || Flags.isPreallocated())
4615 continue;
4616
4617 CCValAssign &VA = ArgLocs[I];
4618 EVT RegVT = VA.getLocVT();
4619 SDValue Arg = OutVals[OutIndex];
4620 bool isByVal = Flags.isByVal();
4621
4622 // Promote the value if needed.
4623 switch (VA.getLocInfo()) {
4624 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4624)
;
4625 case CCValAssign::Full: break;
4626 case CCValAssign::SExt:
4627 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4628 break;
4629 case CCValAssign::ZExt:
4630 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4631 break;
4632 case CCValAssign::AExt:
4633 if (Arg.getValueType().isVector() &&
4634 Arg.getValueType().getVectorElementType() == MVT::i1)
4635 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4636 else if (RegVT.is128BitVector()) {
4637 // Special case: passing MMX values in XMM registers.
4638 Arg = DAG.getBitcast(MVT::i64, Arg);
4639 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4640 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4641 } else
4642 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4643 break;
4644 case CCValAssign::BCvt:
4645 Arg = DAG.getBitcast(RegVT, Arg);
4646 break;
4647 case CCValAssign::Indirect: {
4648 if (isByVal) {
4649 // Memcpy the argument to a temporary stack slot to prevent
4650 // the caller from seeing any modifications the callee may make
4651 // as guaranteed by the `byval` attribute.
4652 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4653 Flags.getByValSize(),
4654 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4655 SDValue StackSlot =
4656 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4657 Chain =
4658 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4659 // From now on treat this as a regular pointer
4660 Arg = StackSlot;
4661 isByVal = false;
4662 } else {
4663 // Store the argument.
4664 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4665 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4666 Chain = DAG.getStore(
4667 Chain, dl, Arg, SpillSlot,
4668 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4669 Arg = SpillSlot;
4670 }
4671 break;
4672 }
4673 }
4674
4675 if (VA.needsCustom()) {
4676 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4677, __extension__
__PRETTY_FUNCTION__))
4677 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4677, __extension__
__PRETTY_FUNCTION__))
;
4678 // Split v64i1 value into two registers
4679 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4680 } else if (VA.isRegLoc()) {
4681 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4682 const TargetOptions &Options = DAG.getTarget().Options;
4683 if (Options.EmitCallSiteInfo)
4684 CSInfo.emplace_back(VA.getLocReg(), I);
4685 if (isVarArg && IsWin64) {
4686 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4687 // shadow reg if callee is a varargs function.
4688 Register ShadowReg;
4689 switch (VA.getLocReg()) {
4690 case X86::XMM0: ShadowReg = X86::RCX; break;
4691 case X86::XMM1: ShadowReg = X86::RDX; break;
4692 case X86::XMM2: ShadowReg = X86::R8; break;
4693 case X86::XMM3: ShadowReg = X86::R9; break;
4694 }
4695 if (ShadowReg)
4696 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4697 }
4698 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4699 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4699, __extension__ __PRETTY_FUNCTION__))
;
4700 if (!StackPtr.getNode())
4701 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4702 getPointerTy(DAG.getDataLayout()));
4703 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4704 dl, DAG, VA, Flags, isByVal));
4705 }
4706 }
4707
4708 if (!MemOpChains.empty())
4709 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4710
4711 if (Subtarget.isPICStyleGOT()) {
4712 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4713 // GOT pointer (except regcall).
4714 if (!isTailCall) {
4715 // Indirect call with RegCall calling convertion may use up all the
4716 // general registers, so it is not suitable to bind EBX reister for
4717 // GOT address, just let register allocator handle it.
4718 if (CallConv != CallingConv::X86_RegCall)
4719 RegsToPass.push_back(std::make_pair(
4720 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4721 getPointerTy(DAG.getDataLayout()))));
4722 } else {
4723 // If we are tail calling and generating PIC/GOT style code load the
4724 // address of the callee into ECX. The value in ecx is used as target of
4725 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4726 // for tail calls on PIC/GOT architectures. Normally we would just put the
4727 // address of GOT into ebx and then call target@PLT. But for tail calls
4728 // ebx would be restored (since ebx is callee saved) before jumping to the
4729 // target@PLT.
4730
4731 // Note: The actual moving to ECX is done further down.
4732 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4733 if (G && !G->getGlobal()->hasLocalLinkage() &&
4734 G->getGlobal()->hasDefaultVisibility())
4735 Callee = LowerGlobalAddress(Callee, DAG);
4736 else if (isa<ExternalSymbolSDNode>(Callee))
4737 Callee = LowerExternalSymbol(Callee, DAG);
4738 }
4739 }
4740
4741 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
4742 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
4743 // From AMD64 ABI document:
4744 // For calls that may call functions that use varargs or stdargs
4745 // (prototype-less calls or calls to functions containing ellipsis (...) in
4746 // the declaration) %al is used as hidden argument to specify the number
4747 // of SSE registers used. The contents of %al do not need to match exactly
4748 // the number of registers, but must be an ubound on the number of SSE
4749 // registers used and is in the range 0 - 8 inclusive.
4750
4751 // Count the number of XMM registers allocated.
4752 static const MCPhysReg XMMArgRegs[] = {
4753 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4754 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4755 };
4756 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4757 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4758, __extension__
__PRETTY_FUNCTION__))
4758 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4758, __extension__
__PRETTY_FUNCTION__))
;
4759 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4760 DAG.getConstant(NumXMMRegs, dl,
4761 MVT::i8)));
4762 }
4763
4764 if (isVarArg && IsMustTail) {
4765 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4766 for (const auto &F : Forwards) {
4767 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4768 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4769 }
4770 }
4771
4772 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4773 // don't need this because the eligibility check rejects calls that require
4774 // shuffling arguments passed in memory.
4775 if (!IsSibcall && isTailCall) {
4776 // Force all the incoming stack arguments to be loaded from the stack
4777 // before any new outgoing arguments are stored to the stack, because the
4778 // outgoing stack slots may alias the incoming argument stack slots, and
4779 // the alias isn't otherwise explicit. This is slightly more conservative
4780 // than necessary, because it means that each store effectively depends
4781 // on every argument instead of just those arguments it would clobber.
4782 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4783
4784 SmallVector<SDValue, 8> MemOpChains2;
4785 SDValue FIN;
4786 int FI = 0;
4787 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4788 ++I, ++OutsIndex) {
4789 CCValAssign &VA = ArgLocs[I];
4790
4791 if (VA.isRegLoc()) {
4792 if (VA.needsCustom()) {
4793 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4794, __extension__
__PRETTY_FUNCTION__))
4794 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4794, __extension__
__PRETTY_FUNCTION__))
;
4795 // This means that we are in special case where one argument was
4796 // passed through two register locations - Skip the next location
4797 ++I;
4798 }
4799
4800 continue;
4801 }
4802
4803 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4803, __extension__ __PRETTY_FUNCTION__))
;
4804 SDValue Arg = OutVals[OutsIndex];
4805 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4806 // Skip inalloca/preallocated arguments. They don't require any work.
4807 if (Flags.isInAlloca() || Flags.isPreallocated())
4808 continue;
4809 // Create frame index.
4810 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4811 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4812 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4813 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4814
4815 if (Flags.isByVal()) {
4816 // Copy relative to framepointer.
4817 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4818 if (!StackPtr.getNode())
4819 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4820 getPointerTy(DAG.getDataLayout()));
4821 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4822 StackPtr, Source);
4823
4824 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4825 ArgChain,
4826 Flags, DAG, dl));
4827 } else {
4828 // Store relative to framepointer.
4829 MemOpChains2.push_back(DAG.getStore(
4830 ArgChain, dl, Arg, FIN,
4831 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4832 }
4833 }
4834
4835 if (!MemOpChains2.empty())
4836 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4837
4838 // Store the return address to the appropriate stack slot.
4839 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4840 getPointerTy(DAG.getDataLayout()),
4841 RegInfo->getSlotSize(), FPDiff, dl);
4842 }
4843
4844 // Build a sequence of copy-to-reg nodes chained together with token chain
4845 // and glue operands which copy the outgoing args into registers.
4846 SDValue InGlue;
4847 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4848 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4849 RegsToPass[i].second, InGlue);
4850 InGlue = Chain.getValue(1);
4851 }
4852
4853 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4854 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4854, __extension__
__PRETTY_FUNCTION__))
;
4855 // In the 64-bit large code model, we have to make all calls
4856 // through a register, since the call instruction's 32-bit
4857 // pc-relative offset may not be large enough to hold the whole
4858 // address.
4859 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4860 Callee->getOpcode() == ISD::ExternalSymbol) {
4861 // Lower direct calls to global addresses and external symbols. Setting
4862 // ForCall to true here has the effect of removing WrapperRIP when possible
4863 // to allow direct calls to be selected without first materializing the
4864 // address into a register.
4865 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4866 } else if (Subtarget.isTarget64BitILP32() &&
4867 Callee.getValueType() == MVT::i32) {
4868 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4869 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4870 }
4871
4872 // Returns a chain & a glue for retval copy to use.
4873 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4874 SmallVector<SDValue, 8> Ops;
4875
4876 if (!IsSibcall && isTailCall && !IsMustTail) {
4877 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
4878 InGlue = Chain.getValue(1);
4879 }
4880
4881 Ops.push_back(Chain);
4882 Ops.push_back(Callee);
4883
4884 if (isTailCall)
4885 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4886
4887 // Add argument registers to the end of the list so that they are known live
4888 // into the call.
4889 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4890 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4891 RegsToPass[i].second.getValueType()));
4892
4893 // Add a register mask operand representing the call-preserved registers.
4894 const uint32_t *Mask = [&]() {
4895 auto AdaptedCC = CallConv;
4896 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4897 // use X86_INTR calling convention because it has the same CSR mask
4898 // (same preserved registers).
4899 if (HasNCSR)
4900 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4901 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4902 // to use the CSR_NoRegs_RegMask.
4903 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4904 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4905 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4906 }();
4907 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4907, __extension__
__PRETTY_FUNCTION__))
;
4908
4909 // If this is an invoke in a 32-bit function using a funclet-based
4910 // personality, assume the function clobbers all registers. If an exception
4911 // is thrown, the runtime will not restore CSRs.
4912 // FIXME: Model this more precisely so that we can register allocate across
4913 // the normal edge and spill and fill across the exceptional edge.
4914 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4915 const Function &CallerFn = MF.getFunction();
4916 EHPersonality Pers =
4917 CallerFn.hasPersonalityFn()
4918 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4919 : EHPersonality::Unknown;
4920 if (isFuncletEHPersonality(Pers))
4921 Mask = RegInfo->getNoPreservedMask();
4922 }
4923
4924 // Define a new register mask from the existing mask.
4925 uint32_t *RegMask = nullptr;
4926
4927 // In some calling conventions we need to remove the used physical registers
4928 // from the reg mask. Create a new RegMask for such calling conventions.
4929 // RegMask for calling conventions that disable only return registers (e.g.
4930 // preserve_most) will be modified later in LowerCallResult.
4931 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
4932 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
4933 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4934
4935 // Allocate a new Reg Mask and copy Mask.
4936 RegMask = MF.allocateRegMask();
4937 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4938 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4939
4940 // Make sure all sub registers of the argument registers are reset
4941 // in the RegMask.
4942 if (ShouldDisableArgRegs) {
4943 for (auto const &RegPair : RegsToPass)
4944 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4945 SubRegs.isValid(); ++SubRegs)
4946 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4947 }
4948
4949 // Create the RegMask Operand according to our updated mask.
4950 Ops.push_back(DAG.getRegisterMask(RegMask));
4951 } else {
4952 // Create the RegMask Operand according to the static mask.
4953 Ops.push_back(DAG.getRegisterMask(Mask));
4954 }
4955
4956 if (InGlue.getNode())
4957 Ops.push_back(InGlue);
4958
4959 if (isTailCall) {
4960 // We used to do:
4961 //// If this is the first return lowered for this function, add the regs
4962 //// to the liveout set for the function.
4963 // This isn't right, although it's probably harmless on x86; liveouts
4964 // should be computed from returns not tail calls. Consider a void
4965 // function making a tail call to a function returning int.
4966 MF.getFrameInfo().setHasTailCall();
4967 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4968
4969 if (IsCFICall)
4970 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4971
4972 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4973 return Ret;
4974 }
4975
4976 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4977 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4978 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4979 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4980 // expanded to the call, directly followed by a special marker sequence and
4981 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4982 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4983, __extension__
__PRETTY_FUNCTION__))
4983 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4983, __extension__
__PRETTY_FUNCTION__))
;
4984 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4984, __extension__
__PRETTY_FUNCTION__))
;
4985
4986 // Add a target global address for the retainRV/claimRV runtime function
4987 // just before the call target.
4988 Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
4989 auto PtrVT = getPointerTy(DAG.getDataLayout());
4990 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
4991 Ops.insert(Ops.begin() + 1, GA);
4992 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4993 } else {
4994 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4995 }
4996
4997 if (IsCFICall)
4998 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4999
5000 InGlue = Chain.getValue(1);
5001 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5002 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
5003
5004 // Save heapallocsite metadata.
5005 if (CLI.CB)
5006 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
5007 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
5008
5009 // Create the CALLSEQ_END node.
5010 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
5011 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
5012 DAG.getTarget().Options.GuaranteedTailCallOpt))
5013 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
5014 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
5015 // If this call passes a struct-return pointer, the callee
5016 // pops that struct pointer.
5017 NumBytesForCalleeToPop = 4;
5018
5019 // Returns a glue for retval copy to use.
5020 if (!IsSibcall) {
5021 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
5022 InGlue, dl);
5023 InGlue = Chain.getValue(1);
5024 }
5025
5026 // Handle result values, copying them out of physregs into vregs that we
5027 // return.
5028 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
5029 InVals, RegMask);
5030}
5031
5032//===----------------------------------------------------------------------===//
5033// Fast Calling Convention (tail call) implementation
5034//===----------------------------------------------------------------------===//
5035
5036// Like std call, callee cleans arguments, convention except that ECX is
5037// reserved for storing the tail called function address. Only 2 registers are
5038// free for argument passing (inreg). Tail call optimization is performed
5039// provided:
5040// * tailcallopt is enabled
5041// * caller/callee are fastcc
5042// On X86_64 architecture with GOT-style position independent code only local
5043// (within module) calls are supported at the moment.
5044// To keep the stack aligned according to platform abi the function
5045// GetAlignedArgumentStackSize ensures that argument delta is always multiples
5046// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
5047// If a tail called function callee has more arguments than the caller the
5048// caller needs to make sure that there is room to move the RETADDR to. This is
5049// achieved by reserving an area the size of the argument delta right after the
5050// original RETADDR, but before the saved framepointer or the spilled registers
5051// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
5052// stack layout:
5053// arg1
5054// arg2
5055// RETADDR
5056// [ new RETADDR
5057// move area ]
5058// (possible EBP)
5059// ESI
5060// EDI
5061// local1 ..
5062
5063/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
5064/// requirement.
5065unsigned
5066X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
5067 SelectionDAG &DAG) const {
5068 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
5069 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
5070 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5071, __extension__
__PRETTY_FUNCTION__))
5071 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5071, __extension__
__PRETTY_FUNCTION__))
;
5072 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
5073}
5074
5075/// Return true if the given stack call argument is already available in the
5076/// same position (relatively) of the caller's incoming argument stack.
5077static
5078bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
5079 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
5080 const X86InstrInfo *TII, const CCValAssign &VA) {
5081 unsigned Bytes = Arg.getValueSizeInBits() / 8;
5082
5083 for (;;) {
5084 // Look through nodes that don't alter the bits of the incoming value.
5085 unsigned Op = Arg.getOpcode();
5086 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
5087 Arg = Arg.getOperand(0);
5088 continue;
5089 }
5090 if (Op == ISD::TRUNCATE) {
5091 const SDValue &TruncInput = Arg.getOperand(0);
5092 if (TruncInput.getOpcode() == ISD::AssertZext &&
5093 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
5094 Arg.getValueType()) {
5095 Arg = TruncInput.getOperand(0);
5096 continue;
5097 }
5098 }
5099 break;
5100 }
5101
5102 int FI = INT_MAX2147483647;
5103 if (Arg.getOpcode() == ISD::CopyFromReg) {
5104 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
5105 if (!VR.isVirtual())
5106 return false;
5107 MachineInstr *Def = MRI->getVRegDef(VR);
5108 if (!Def)
5109 return false;
5110 if (!Flags.isByVal()) {
5111 if (!TII->isLoadFromStackSlot(*Def, FI))
5112 return false;
5113 } else {
5114 unsigned Opcode = Def->getOpcode();
5115 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
5116 Opcode == X86::LEA64_32r) &&
5117 Def->getOperand(1).isFI()) {
5118 FI = Def->getOperand(1).getIndex();
5119 Bytes = Flags.getByValSize();
5120 } else
5121 return false;
5122 }
5123 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
5124 if (Flags.isByVal())
5125 // ByVal argument is passed in as a pointer but it's now being
5126 // dereferenced. e.g.
5127 // define @foo(%struct.X* %A) {
5128 // tail call @bar(%struct.X* byval %A)
5129 // }
5130 return false;
5131 SDValue Ptr = Ld->getBasePtr();
5132 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
5133 if (!FINode)
5134 return false;
5135 FI = FINode->getIndex();
5136 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
5137 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
5138 FI = FINode->getIndex();
5139 Bytes = Flags.getByValSize();
5140 } else
5141 return false;
5142
5143 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "llvm/lib/Target/X86/X86ISelLowering.cpp",
5143, __extension__ __PRETTY_FUNCTION__))
;
5144 if (!MFI.isFixedObjectIndex(FI))
5145 return false;
5146
5147 if (Offset != MFI.getObjectOffset(FI))
5148 return false;
5149
5150 // If this is not byval, check that the argument stack object is immutable.
5151 // inalloca and argument copy elision can create mutable argument stack
5152 // objects. Byval objects can be mutated, but a byval call intends to pass the
5153 // mutated memory.
5154 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
5155 return false;
5156
5157 if (VA.getLocVT().getFixedSizeInBits() >
5158 Arg.getValueSizeInBits().getFixedValue()) {
5159 // If the argument location is wider than the argument type, check that any
5160 // extension flags match.
5161 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
5162 Flags.isSExt() != MFI.isObjectSExt(FI)) {
5163 return false;
5164 }
5165 }
5166
5167 return Bytes == MFI.getObjectSize(FI);
5168}
5169
5170/// Check whether the call is eligible for tail call optimization. Targets
5171/// that want to do tail call optimization should implement this function.
5172bool X86TargetLowering::IsEligibleForTailCallOptimization(
5173 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
5174 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
5175 const SmallVectorImpl<SDValue> &OutVals,
5176 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5177 if (!mayTailCallThisCC(CalleeCC))
5178 return false;
5179
5180 // If -tailcallopt is specified, make fastcc functions tail-callable.
5181 MachineFunction &MF = DAG.getMachineFunction();
5182 const Function &CallerF = MF.getFunction();
5183
5184 // If the function return type is x86_fp80 and the callee return type is not,
5185 // then the FP_EXTEND of the call result is not a nop. It's not safe to
5186 // perform a tailcall optimization here.
5187 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
5188 return false;
5189
5190 CallingConv::ID CallerCC = CallerF.getCallingConv();
5191 bool CCMatch = CallerCC == CalleeCC;
5192 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
5193 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
5194 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
5195 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
5196
5197 // Win64 functions have extra shadow space for argument homing. Don't do the
5198 // sibcall if the caller and callee have mismatched expectations for this
5199 // space.
5200 if (IsCalleeWin64 != IsCallerWin64)
5201 return false;
5202
5203 if (IsGuaranteeTCO) {
5204 if (canGuaranteeTCO(CalleeCC) && CCMatch)
5205 return true;
5206 return false;
5207 }
5208
5209 // Look for obvious safe cases to perform tail call optimization that do not
5210 // require ABI changes. This is what gcc calls sibcall.
5211
5212 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
5213 // emit a special epilogue.
5214 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5215 if (RegInfo->hasStackRealignment(MF))
5216 return false;
5217
5218 // Also avoid sibcall optimization if we're an sret return fn and the callee
5219 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
5220 // insufficient.
5221 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
5222 // For a compatible tail call the callee must return our sret pointer. So it
5223 // needs to be (a) an sret function itself and (b) we pass our sret as its
5224 // sret. Condition #b is harder to determine.
5225 return false;
5226 } else if (IsCalleePopSRet)
5227 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
5228 // expect that.
5229 return false;
5230
5231 // Do not sibcall optimize vararg calls unless all arguments are passed via
5232 // registers.
5233 LLVMContext &C = *DAG.getContext();
5234 if (isVarArg && !Outs.empty()) {
5235 // Optimizing for varargs on Win64 is unlikely to be safe without
5236 // additional testing.
5237 if (IsCalleeWin64 || IsCallerWin64)
5238 return false;
5239
5240 SmallVector<CCValAssign, 16> ArgLocs;
5241 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5242
5243 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5244 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
5245 if (!ArgLocs[i].isRegLoc())
5246 return false;
5247 }
5248
5249 // If the call result is in ST0 / ST1, it needs to be popped off the x87
5250 // stack. Therefore, if it's not used by the call it is not safe to optimize
5251 // this into a sibcall.
5252 bool Unused = false;
5253 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5254 if (!Ins[i].Used) {
5255 Unused = true;
5256 break;
5257 }
5258 }
5259 if (Unused) {
5260 SmallVector<CCValAssign, 16> RVLocs;
5261 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
5262 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
5263 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5264 CCValAssign &VA = RVLocs[i];
5265 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
5266 return false;
5267 }
5268 }
5269
5270 // Check that the call results are passed in the same way.
5271 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5272 RetCC_X86, RetCC_X86))
5273 return false;
5274 // The callee has to preserve all registers the caller needs to preserve.
5275 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
5276 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5277 if (!CCMatch) {
5278 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5279 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5280 return false;
5281 }
5282
5283 unsigned StackArgsSize = 0;
5284
5285 // If the callee takes no arguments then go on to check the results of the
5286 // call.
5287 if (!Outs.empty()) {
5288 // Check if stack adjustment is needed. For now, do not do this if any
5289 // argument is passed on the stack.
5290 SmallVector<CCValAssign, 16> ArgLocs;
5291 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5292
5293 // Allocate shadow area for Win64
5294 if (IsCalleeWin64)
5295 CCInfo.AllocateStack(32, Align(8));
5296
5297 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5298 StackArgsSize = CCInfo.getNextStackOffset();
5299
5300 if (CCInfo.getNextStackOffset()) {
5301 // Check if the arguments are already laid out in the right way as
5302 // the caller's fixed stack objects.
5303 MachineFrameInfo &MFI = MF.getFrameInfo();
5304 const MachineRegisterInfo *MRI = &MF.getRegInfo();
5305 const X86InstrInfo *TII = Subtarget.getInstrInfo();
5306 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5307 CCValAssign &VA = ArgLocs[i];
5308 SDValue Arg = OutVals[i];
5309 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5310 if (VA.getLocInfo() == CCValAssign::Indirect)
5311 return false;
5312 if (!VA.isRegLoc()) {
5313 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
5314 MFI, MRI, TII, VA))
5315 return false;
5316 }
5317 }
5318 }
5319
5320 bool PositionIndependent = isPositionIndependent();
5321 // If the tailcall address may be in a register, then make sure it's
5322 // possible to register allocate for it. In 32-bit, the call address can
5323 // only target EAX, EDX, or ECX since the tail call must be scheduled after
5324 // callee-saved registers are restored. These happen to be the same
5325 // registers used to pass 'inreg' arguments so watch out for those.
5326 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
5327 !isa<ExternalSymbolSDNode>(Callee)) ||
5328 PositionIndependent)) {
5329 unsigned NumInRegs = 0;
5330 // In PIC we need an extra register to formulate the address computation
5331 // for the callee.
5332 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
5333
5334 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5335 CCValAssign &VA = ArgLocs[i];
5336 if (!VA.isRegLoc())
5337 continue;
5338 Register Reg = VA.getLocReg();
5339 switch (Reg) {
5340 default: break;
5341 case X86::EAX: case X86::EDX: case X86::ECX:
5342 if (++NumInRegs == MaxInRegs)
5343 return false;
5344 break;
5345 }
5346 }
5347 }
5348
5349 const MachineRegisterInfo &MRI = MF.getRegInfo();
5350 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5351 return false;
5352 }
5353
5354 bool CalleeWillPop =
5355 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
5356 MF.getTarget().Options.GuaranteedTailCallOpt);
5357
5358 if (unsigned BytesToPop =
5359 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
5360 // If we have bytes to pop, the callee must pop them.
5361 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
5362 if (!CalleePopMatches)
5363 return false;
5364 } else if (CalleeWillPop && StackArgsSize > 0) {
5365 // If we don't have bytes to pop, make sure the callee doesn't pop any.
5366 return false;
5367 }
5368
5369 return true;
5370}
5371
5372FastISel *
5373X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
5374 const TargetLibraryInfo *libInfo) const {
5375 return X86::createFastISel(funcInfo, libInfo);
5376}
5377
5378//===----------------------------------------------------------------------===//
5379// Other Lowering Hooks
5380//===----------------------------------------------------------------------===//
5381
5382bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
5383 bool AssumeSingleUse) {
5384 if (!AssumeSingleUse && !Op.hasOneUse())
5385 return false;
5386 if (!ISD::isNormalLoad(Op.getNode()))
5387 return false;
5388
5389 // If this is an unaligned vector, make sure the target supports folding it.
5390 auto *Ld = cast<LoadSDNode>(Op.getNode());
5391 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
5392 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
5393 return false;
5394
5395 // TODO: If this is a non-temporal load and the target has an instruction
5396 // for it, it should not be folded. See "useNonTemporalLoad()".
5397
5398 return true;
5399}
5400
5401bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
5402 const X86Subtarget &Subtarget,
5403 bool AssumeSingleUse) {
5404 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory")(static_cast <bool> (Subtarget.hasAVX() && "Expected AVX for broadcast from memory"
) ? void (0) : __assert_fail ("Subtarget.hasAVX() && \"Expected AVX for broadcast from memory\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5404, __extension__
__PRETTY_FUNCTION__))
;
5405 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
5406 return false;
5407
5408 // We can not replace a wide volatile load with a broadcast-from-memory,
5409 // because that would narrow the load, which isn't legal for volatiles.
5410 auto *Ld = cast<LoadSDNode>(Op.getNode());
5411 return !Ld->isVolatile() ||
5412 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
5413}
5414
5415bool X86::mayFoldIntoStore(SDValue Op) {
5416 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
5417}
5418
5419bool X86::mayFoldIntoZeroExtend(SDValue Op) {
5420 if (Op.hasOneUse()) {
5421 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
5422 return (ISD::ZERO_EXTEND == Opcode);
5423 }
5424 return false;
5425}
5426
5427static bool isTargetShuffle(unsigned Opcode) {
5428 switch(Opcode) {
5429 default: return false;
5430 case X86ISD::BLENDI:
5431 case X86ISD::PSHUFB:
5432 case X86ISD::PSHUFD:
5433 case X86ISD::PSHUFHW:
5434 case X86ISD::PSHUFLW:
5435 case X86ISD::SHUFP:
5436 case X86ISD::INSERTPS:
5437 case X86ISD::EXTRQI:
5438 case X86ISD::INSERTQI:
5439 case X86ISD::VALIGN:
5440 case X86ISD::PALIGNR:
5441 case X86ISD::VSHLDQ:
5442 case X86ISD::VSRLDQ:
5443 case X86ISD::MOVLHPS:
5444 case X86ISD::MOVHLPS:
5445 case X86ISD::MOVSHDUP:
5446 case X86ISD::MOVSLDUP:
5447 case X86ISD::MOVDDUP:
5448 case X86ISD::MOVSS:
5449 case X86ISD::MOVSD:
5450 case X86ISD::MOVSH:
5451 case X86ISD::UNPCKL:
5452 case X86ISD::UNPCKH:
5453 case X86ISD::VBROADCAST:
5454 case X86ISD::VPERMILPI:
5455 case X86ISD::VPERMILPV:
5456 case X86ISD::VPERM2X128:
5457 case X86ISD::SHUF128:
5458 case X86ISD::VPERMIL2:
5459 case X86ISD::VPERMI:
5460 case X86ISD::VPPERM:
5461 case X86ISD::VPERMV:
5462 case X86ISD::VPERMV3:
5463 case X86ISD::VZEXT_MOVL:
5464 return true;
5465 }
5466}
5467
5468static bool isTargetShuffleVariableMask(unsigned Opcode) {
5469 switch (Opcode) {
5470 default: return false;
5471 // Target Shuffles.
5472 case X86ISD::PSHUFB:
5473 case X86ISD::VPERMILPV:
5474 case X86ISD::VPERMIL2:
5475 case X86ISD::VPPERM:
5476 case X86ISD::VPERMV:
5477 case X86ISD::VPERMV3:
5478 return true;
5479 // 'Faux' Target Shuffles.
5480 case ISD::OR:
5481 case ISD::AND:
5482 case X86ISD::ANDNP:
5483 return true;
5484 }
5485}
5486
5487SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
5488 MachineFunction &MF = DAG.getMachineFunction();
5489 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5490 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
5491 int ReturnAddrIndex = FuncInfo->getRAIndex();
5492
5493 if (ReturnAddrIndex == 0) {
5494 // Set up a frame object for the return address.
5495 unsigned SlotSize = RegInfo->getSlotSize();
5496 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
5497 -(int64_t)SlotSize,
5498 false);
5499 FuncInfo->setRAIndex(ReturnAddrIndex);
5500 }
5501
5502 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
5503}
5504
5505bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
5506 bool hasSymbolicDisplacement) {
5507 // Offset should fit into 32 bit immediate field.
5508 if (!isInt<32>(Offset))
5509 return false;
5510
5511 // If we don't have a symbolic displacement - we don't have any extra
5512 // restrictions.
5513 if (!hasSymbolicDisplacement)
5514 return true;
5515
5516 // FIXME: Some tweaks might be needed for medium code model.
5517 if (M != CodeModel::Small && M != CodeModel::Kernel)
5518 return false;
5519
5520 // For small code model we assume that latest object is 16MB before end of 31
5521 // bits boundary. We may also accept pretty large negative constants knowing
5522 // that all objects are in the positive half of address space.
5523 if (M == CodeModel::Small && Offset < 16*1024*1024)
5524 return true;
5525
5526 // For kernel code model we know that all object resist in the negative half
5527 // of 32bits address space. We may not accept negative offsets, since they may
5528 // be just off and we may accept pretty large positive ones.
5529 if (M == CodeModel::Kernel && Offset >= 0)
5530 return true;
5531
5532 return false;
5533}
5534
5535/// Determines whether the callee is required to pop its own arguments.
5536/// Callee pop is necessary to support tail calls.
5537bool X86::isCalleePop(CallingConv::ID CallingConv,
5538 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
5539 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
5540 // can guarantee TCO.
5541 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
5542 return true;
5543
5544 switch (CallingConv) {
5545 default:
5546 return false;
5547 case CallingConv::X86_StdCall:
5548 case CallingConv::X86_FastCall:
5549 case CallingConv::X86_ThisCall:
5550 case CallingConv::X86_VectorCall:
5551 return !is64Bit;
5552 }
5553}
5554
5555/// Return true if the condition is an signed comparison operation.
5556static bool isX86CCSigned(unsigned X86CC) {
5557 switch (X86CC) {
5558 default:
5559 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5559)
;
5560 case X86::COND_E:
5561 case X86::COND_NE:
5562 case X86::COND_B:
5563 case X86::COND_A:
5564 case X86::COND_BE:
5565 case X86::COND_AE:
5566 return false;
5567 case X86::COND_G:
5568 case X86::COND_GE:
5569 case X86::COND_L:
5570 case X86::COND_LE:
5571 return true;
5572 }
5573}
5574
5575static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5576 switch (SetCCOpcode) {
5577 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5577)
;
5578 case ISD::SETEQ: return X86::COND_E;
5579 case ISD::SETGT: return X86::COND_G;
5580 case ISD::SETGE: return X86::COND_GE;
5581 case ISD::SETLT: return X86::COND_L;
5582 case ISD::SETLE: return X86::COND_LE;
5583 case ISD::SETNE: return X86::COND_NE;
5584 case ISD::SETULT: return X86::COND_B;
5585 case ISD::SETUGT: return X86::COND_A;
5586 case ISD::SETULE: return X86::COND_BE;
5587 case ISD::SETUGE: return X86::COND_AE;
5588 }
5589}
5590
5591/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5592/// condition code, returning the condition code and the LHS/RHS of the
5593/// comparison to make.
5594static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5595 bool isFP, SDValue &LHS, SDValue &RHS,
5596 SelectionDAG &DAG) {
5597 if (!isFP) {
5598 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5599 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
5600 // X > -1 -> X == 0, jump !sign.
5601 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5602 return X86::COND_NS;
5603 }
5604 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
5605 // X < 0 -> X == 0, jump on sign.
5606 return X86::COND_S;
5607 }
5608 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
5609 // X >= 0 -> X == 0, jump on !sign.
5610 return X86::COND_NS;
5611 }
5612 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5613 // X < 1 -> X <= 0
5614 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5615 return X86::COND_LE;
5616 }
5617 }
5618
5619 return TranslateIntegerX86CC(SetCCOpcode);
5620 }
5621
5622 // First determine if it is required or is profitable to flip the operands.
5623
5624 // If LHS is a foldable load, but RHS is not, flip the condition.
5625 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5626 !ISD::isNON_EXTLoad(RHS.getNode())) {
5627 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5628 std::swap(LHS, RHS);
5629 }
5630
5631 switch (SetCCOpcode) {
5632 default: break;
5633 case ISD::SETOLT:
5634 case ISD::SETOLE:
5635 case ISD::SETUGT:
5636 case ISD::SETUGE:
5637 std::swap(LHS, RHS);
5638 break;
5639 }
5640
5641 // On a floating point condition, the flags are set as follows:
5642 // ZF PF CF op
5643 // 0 | 0 | 0 | X > Y
5644 // 0 | 0 | 1 | X < Y
5645 // 1 | 0 | 0 | X == Y
5646 // 1 | 1 | 1 | unordered
5647 switch (SetCCOpcode) {
5648 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5648)
;
5649 case ISD::SETUEQ:
5650 case ISD::SETEQ: return X86::COND_E;
5651 case ISD::SETOLT: // flipped
5652 case ISD::SETOGT:
5653 case ISD::SETGT: return X86::COND_A;
5654 case ISD::SETOLE: // flipped
5655 case ISD::SETOGE:
5656 case ISD::SETGE: return X86::COND_AE;
5657 case ISD::SETUGT: // flipped
5658 case ISD::SETULT:
5659 case ISD::SETLT: return X86::COND_B;
5660 case ISD::SETUGE: // flipped
5661 case ISD::SETULE:
5662 case ISD::SETLE: return X86::COND_BE;
5663 case ISD::SETONE:
5664 case ISD::SETNE: return X86::COND_NE;
5665 case ISD::SETUO: return X86::COND_P;
5666 case ISD::SETO: return X86::COND_NP;
5667 case ISD::SETOEQ:
5668 case ISD::SETUNE: return X86::COND_INVALID;
5669 }
5670}
5671
5672/// Is there a floating point cmov for the specific X86 condition code?
5673/// Current x86 isa includes the following FP cmov instructions:
5674/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5675static bool hasFPCMov(unsigned X86CC) {
5676 switch (X86CC) {
5677 default:
5678 return false;
5679 case X86::COND_B:
5680 case X86::COND_BE:
5681 case X86::COND_E:
5682 case X86::COND_P:
5683 case X86::COND_A:
5684 case X86::COND_AE:
5685 case X86::COND_NE:
5686 case X86::COND_NP:
5687 return true;
5688 }
5689}
5690
5691static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
5692 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
5693 VT.is512BitVector();
5694}
5695
5696bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5697 const CallInst &I,
5698 MachineFunction &MF,
5699 unsigned Intrinsic) const {
5700 Info.flags = MachineMemOperand::MONone;
5701 Info.offset = 0;
5702
5703 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5704 if (!IntrData) {
5705 switch (Intrinsic) {
5706 case Intrinsic::x86_aesenc128kl:
5707 case Intrinsic::x86_aesdec128kl:
5708 Info.opc = ISD::INTRINSIC_W_CHAIN;
5709 Info.ptrVal = I.getArgOperand(1);
5710 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5711 Info.align = Align(1);
5712 Info.flags |= MachineMemOperand::MOLoad;
5713 return true;
5714 case Intrinsic::x86_aesenc256kl:
5715 case Intrinsic::x86_aesdec256kl:
5716 Info.opc = ISD::INTRINSIC_W_CHAIN;
5717 Info.ptrVal = I.getArgOperand(1);
5718 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5719 Info.align = Align(1);
5720 Info.flags |= MachineMemOperand::MOLoad;
5721 return true;
5722 case Intrinsic::x86_aesencwide128kl:
5723 case Intrinsic::x86_aesdecwide128kl:
5724 Info.opc = ISD::INTRINSIC_W_CHAIN;
5725 Info.ptrVal = I.getArgOperand(0);
5726 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5727 Info.align = Align(1);
5728 Info.flags |= MachineMemOperand::MOLoad;
5729 return true;
5730 case Intrinsic::x86_aesencwide256kl:
5731 case Intrinsic::x86_aesdecwide256kl:
5732 Info.opc = ISD::INTRINSIC_W_CHAIN;
5733 Info.ptrVal = I.getArgOperand(0);
5734 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5735 Info.align = Align(1);
5736 Info.flags |= MachineMemOperand::MOLoad;
5737 return true;
5738 case Intrinsic::x86_cmpccxadd32:
5739 case Intrinsic::x86_cmpccxadd64:
5740 case Intrinsic::x86_atomic_bts:
5741 case Intrinsic::x86_atomic_btc:
5742 case Intrinsic::x86_atomic_btr: {
5743 Info.opc = ISD::INTRINSIC_W_CHAIN;
5744 Info.ptrVal = I.getArgOperand(0);
5745 unsigned Size = I.getType()->getScalarSizeInBits();
5746 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5747 Info.align = Align(Size);
5748 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5749 MachineMemOperand::MOVolatile;
5750 return true;
5751 }
5752 case Intrinsic::x86_atomic_bts_rm:
5753 case Intrinsic::x86_atomic_btc_rm:
5754 case Intrinsic::x86_atomic_btr_rm: {
5755 Info.opc = ISD::INTRINSIC_W_CHAIN;
5756 Info.ptrVal = I.getArgOperand(0);
5757 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
5758 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5759 Info.align = Align(Size);
5760 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5761 MachineMemOperand::MOVolatile;
5762 return true;
5763 }
5764 case Intrinsic::x86_aadd32:
5765 case Intrinsic::x86_aadd64:
5766 case Intrinsic::x86_aand32:
5767 case Intrinsic::x86_aand64:
5768 case Intrinsic::x86_aor32:
5769 case Intrinsic::x86_aor64:
5770 case Intrinsic::x86_axor32:
5771 case Intrinsic::x86_axor64:
5772 case Intrinsic::x86_atomic_add_cc:
5773 case Intrinsic::x86_atomic_sub_cc:
5774 case Intrinsic::x86_atomic_or_cc:
5775 case Intrinsic::x86_atomic_and_cc:
5776 case Intrinsic::x86_atomic_xor_cc: {
5777 Info.opc = ISD::INTRINSIC_W_CHAIN;
5778 Info.ptrVal = I.getArgOperand(0);
5779 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
5780 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5781 Info.align = Align(Size);
5782 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5783 MachineMemOperand::MOVolatile;
5784 return true;
5785 }
5786 }
5787 return false;
5788 }
5789
5790 switch (IntrData->Type) {
5791 case TRUNCATE_TO_MEM_VI8:
5792 case TRUNCATE_TO_MEM_VI16:
5793 case TRUNCATE_TO_MEM_VI32: {
5794 Info.opc = ISD::INTRINSIC_VOID;
5795 Info.ptrVal = I.getArgOperand(0);
5796 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5797 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5798 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5799 ScalarVT = MVT::i8;
5800 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5801 ScalarVT = MVT::i16;
5802 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5803 ScalarVT = MVT::i32;
5804
5805 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5806 Info.align = Align(1);
5807 Info.flags |= MachineMemOperand::MOStore;
5808 break;
5809 }
5810 case GATHER:
5811 case GATHER_AVX2: {
5812 Info.opc = ISD::INTRINSIC_W_CHAIN;
5813 Info.ptrVal = nullptr;
5814 MVT DataVT = MVT::getVT(I.getType());
5815 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5816 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5817 IndexVT.getVectorNumElements());
5818 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5819 Info.align = Align(1);
5820 Info.flags |= MachineMemOperand::MOLoad;
5821 break;
5822 }
5823 case SCATTER: {
5824 Info.opc = ISD::INTRINSIC_VOID;
5825 Info.ptrVal = nullptr;
5826 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5827 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5828 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5829 IndexVT.getVectorNumElements());
5830 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5831 Info.align = Align(1);
5832 Info.flags |= MachineMemOperand::MOStore;
5833 break;
5834 }
5835 default:
5836 return false;
5837 }
5838
5839 return true;
5840}
5841
5842/// Returns true if the target can instruction select the
5843/// specified FP immediate natively. If false, the legalizer will
5844/// materialize the FP immediate as a load from a constant pool.
5845bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5846 bool ForCodeSize) const {
5847 for (const APFloat &FPImm : LegalFPImmediates)
5848 if (Imm.bitwiseIsEqual(FPImm))
5849 return true;
5850 return false;
5851}
5852
5853bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5854 ISD::LoadExtType ExtTy,
5855 EVT NewVT) const {
5856 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5856, __extension__
__PRETTY_FUNCTION__))
;
5857
5858 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5859 // relocation target a movq or addq instruction: don't let the load shrink.
5860 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5861 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5862 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5863 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5864
5865 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5866 // those uses are extracted directly into a store, then the extract + store
5867 // can be store-folded. Therefore, it's probably not worth splitting the load.
5868 EVT VT = Load->getValueType(0);
5869 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5870 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5871 // Skip uses of the chain value. Result 0 of the node is the load value.
5872 if (UI.getUse().getResNo() != 0)
5873 continue;
5874
5875 // If this use is not an extract + store, it's probably worth splitting.
5876 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5877 UI->use_begin()->getOpcode() != ISD::STORE)
5878 return true;
5879 }
5880 // All non-chain uses are extract + store.
5881 return false;
5882 }
5883
5884 return true;
5885}
5886
5887/// Returns true if it is beneficial to convert a load of a constant
5888/// to just the constant itself.
5889bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5890 Type *Ty) const {
5891 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5891, __extension__ __PRETTY_FUNCTION__))
;
5892
5893 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5894 if (BitSize == 0 || BitSize > 64)
5895 return false;
5896 return true;
5897}
5898
5899bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5900 // If we are using XMM registers in the ABI and the condition of the select is
5901 // a floating-point compare and we have blendv or conditional move, then it is
5902 // cheaper to select instead of doing a cross-register move and creating a
5903 // load that depends on the compare result.
5904 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5905 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5906}
5907
5908bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5909 // TODO: It might be a win to ease or lift this restriction, but the generic
5910 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5911 if (VT.isVector() && Subtarget.hasAVX512())
5912 return false;
5913
5914 return true;
5915}
5916
5917bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5918 SDValue C) const {
5919 // TODO: We handle scalars using custom code, but generic combining could make
5920 // that unnecessary.
5921 APInt MulC;
5922 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5923 return false;
5924
5925 // Find the type this will be legalized too. Otherwise we might prematurely
5926 // convert this to shl+add/sub and then still have to type legalize those ops.
5927 // Another choice would be to defer the decision for illegal types until
5928 // after type legalization. But constant splat vectors of i64 can't make it
5929 // through type legalization on 32-bit targets so we would need to special
5930 // case vXi64.
5931 while (getTypeAction(Context, VT) != TypeLegal)
5932 VT = getTypeToTransformTo(Context, VT);
5933
5934 // If vector multiply is legal, assume that's faster than shl + add/sub.
5935 // Multiply is a complex op with higher latency and lower throughput in
5936 // most implementations, sub-vXi32 vector multiplies are always fast,
5937 // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)