Bug Summary

File:build/source/llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 17442, column 31
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16 -I lib/Target/X86 -I /build/source/llvm/lib/Target/X86 -I include -I /build/source/llvm/include -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -source-date-epoch 1674602410 -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-01-25-024556-16494-1 -x c++ /build/source/llvm/lib/Target/X86/X86ISelLowering.cpp
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/IntrinsicLowering.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineJumpTableInfo.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetLowering.h"
43#include "llvm/CodeGen/WinEHFuncInfo.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/DiagnosticInfo.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/PatternMatch.h"
55#include "llvm/MC/MCAsmInfo.h"
56#include "llvm/MC/MCContext.h"
57#include "llvm/MC/MCExpr.h"
58#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <bitset>
67#include <cctype>
68#include <numeric>
69using namespace llvm;
70
71#define DEBUG_TYPE"x86-isel" "x86-isel"
72
73STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
74
75static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
76 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
77 cl::desc(
78 "Sets the preferable loop alignment for experiments (as log2 bytes) "
79 "for innermost loops only. If specified, this option overrides "
80 "alignment set by x86-experimental-pref-loop-alignment."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107/// Returns true if a CC can dynamically exclude a register from the list of
108/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
109/// params/returns.
110static bool shouldDisableCalleeSavedRegisterCC(CallingConv::ID CC) {
111 switch (CC) {
112 default:
113 return false;
114 case CallingConv::X86_RegCall:
115 case CallingConv::PreserveMost:
116 case CallingConv::PreserveAll:
117 return true;
118 }
119}
120
121X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
122 const X86Subtarget &STI)
123 : TargetLowering(TM), Subtarget(STI) {
124 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
125 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
126
127 // Set up the TargetLowering object.
128
129 // X86 is weird. It always uses i8 for shift amounts and setcc results.
130 setBooleanContents(ZeroOrOneBooleanContent);
131 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
132 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
133
134 // For 64-bit, since we have so many registers, use the ILP scheduler.
135 // For 32-bit, use the register pressure specific scheduling.
136 // For Atom, always use ILP scheduling.
137 if (Subtarget.isAtom())
138 setSchedulingPreference(Sched::ILP);
139 else if (Subtarget.is64Bit())
140 setSchedulingPreference(Sched::ILP);
141 else
142 setSchedulingPreference(Sched::RegPressure);
143 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
144 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
145
146 // Bypass expensive divides and use cheaper ones.
147 if (TM.getOptLevel() >= CodeGenOpt::Default) {
148 if (Subtarget.hasSlowDivide32())
149 addBypassSlowDiv(32, 8);
150 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
151 addBypassSlowDiv(64, 32);
152 }
153
154 // Setup Windows compiler runtime calls.
155 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
156 static const struct {
157 const RTLIB::Libcall Op;
158 const char * const Name;
159 const CallingConv::ID CC;
160 } LibraryCalls[] = {
161 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
162 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
163 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
164 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
165 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
166 };
167
168 for (const auto &LC : LibraryCalls) {
169 setLibcallName(LC.Op, LC.Name);
170 setLibcallCallingConv(LC.Op, LC.CC);
171 }
172 }
173
174 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
175 // MSVCRT doesn't have powi; fall back to pow
176 setLibcallName(RTLIB::POWI_F32, nullptr);
177 setLibcallName(RTLIB::POWI_F64, nullptr);
178 }
179
180 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
181 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
182 // FIXME: Should we be limiting the atomic size on other configs? Default is
183 // 1024.
184 if (!Subtarget.canUseCMPXCHG8B())
185 setMaxAtomicSizeInBitsSupported(32);
186
187 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
188
189 setMaxLargeFPConvertBitWidthSupported(128);
190
191 // Set up the register classes.
192 addRegisterClass(MVT::i8, &X86::GR8RegClass);
193 addRegisterClass(MVT::i16, &X86::GR16RegClass);
194 addRegisterClass(MVT::i32, &X86::GR32RegClass);
195 if (Subtarget.is64Bit())
196 addRegisterClass(MVT::i64, &X86::GR64RegClass);
197
198 for (MVT VT : MVT::integer_valuetypes())
199 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
200
201 // We don't accept any truncstore of integer registers.
202 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
203 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
204 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
205 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
206 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
207 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
208
209 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
210
211 // SETOEQ and SETUNE require checking two conditions.
212 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
213 setCondCodeAction(ISD::SETOEQ, VT, Expand);
214 setCondCodeAction(ISD::SETUNE, VT, Expand);
215 }
216
217 // Integer absolute.
218 if (Subtarget.canUseCMOV()) {
219 setOperationAction(ISD::ABS , MVT::i16 , Custom);
220 setOperationAction(ISD::ABS , MVT::i32 , Custom);
221 if (Subtarget.is64Bit())
222 setOperationAction(ISD::ABS , MVT::i64 , Custom);
223 }
224
225 // Signed saturation subtraction.
226 setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
227 setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
228 setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
229 if (Subtarget.is64Bit())
230 setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
231
232 // Funnel shifts.
233 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
234 // For slow shld targets we only lower for code size.
235 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
236
237 setOperationAction(ShiftOp , MVT::i8 , Custom);
238 setOperationAction(ShiftOp , MVT::i16 , Custom);
239 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
240 if (Subtarget.is64Bit())
241 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
242 }
243
244 if (!Subtarget.useSoftFloat()) {
245 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
246 // operation.
247 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
248 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
249 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
250 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
251 // We have an algorithm for SSE2, and we turn this into a 64-bit
252 // FILD or VCVTUSI2SS/SD for other targets.
253 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
254 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
255 // We have an algorithm for SSE2->double, and we turn this into a
256 // 64-bit FILD followed by conditional FADD for other targets.
257 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
258 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
259
260 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
261 // this operation.
262 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
264 // SSE has no i16 to fp conversion, only i32. We promote in the handler
265 // to allow f80 to use i16 and f64 to use i16 with sse1 only
266 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
267 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
268 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
269 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
270 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
271 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
272 // are Legal, f80 is custom lowered.
273 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
274 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
275
276 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
277 // this operation.
278 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
279 // FIXME: This doesn't generate invalid exception when it should. PR44019.
280 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
281 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
282 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
283 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
284 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
285 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
286 // are Legal, f80 is custom lowered.
287 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
288 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
289
290 // Handle FP_TO_UINT by promoting the destination to a larger signed
291 // conversion.
292 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
293 // FIXME: This doesn't generate invalid exception when it should. PR44019.
294 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
295 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
296 // FIXME: This doesn't generate invalid exception when it should. PR44019.
297 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
298 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
299 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
300 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
301 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
302
303 setOperationAction(ISD::LRINT, MVT::f32, Custom);
304 setOperationAction(ISD::LRINT, MVT::f64, Custom);
305 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
306 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
307
308 if (!Subtarget.is64Bit()) {
309 setOperationAction(ISD::LRINT, MVT::i64, Custom);
310 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
311 }
312 }
313
314 if (Subtarget.hasSSE2()) {
315 // Custom lowering for saturating float to int conversions.
316 // We handle promotion to larger result types manually.
317 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
318 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
319 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
320 }
321 if (Subtarget.is64Bit()) {
322 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
323 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
324 }
325 }
326
327 // Handle address space casts between mixed sized pointers.
328 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
329 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
330
331 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
332 if (!Subtarget.hasSSE2()) {
333 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
334 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
335 if (Subtarget.is64Bit()) {
336 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
337 // Without SSE, i64->f64 goes through memory.
338 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
339 }
340 } else if (!Subtarget.is64Bit())
341 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
342
343 // Scalar integer divide and remainder are lowered to use operations that
344 // produce two results, to match the available instructions. This exposes
345 // the two-result form to trivial CSE, which is able to combine x/y and x%y
346 // into a single instruction.
347 //
348 // Scalar integer multiply-high is also lowered to use two-result
349 // operations, to match the available instructions. However, plain multiply
350 // (low) operations are left as Legal, as there are single-result
351 // instructions for this in x86. Using the two-result multiply instructions
352 // when both high and low results are needed must be arranged by dagcombine.
353 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
354 setOperationAction(ISD::MULHS, VT, Expand);
355 setOperationAction(ISD::MULHU, VT, Expand);
356 setOperationAction(ISD::SDIV, VT, Expand);
357 setOperationAction(ISD::UDIV, VT, Expand);
358 setOperationAction(ISD::SREM, VT, Expand);
359 setOperationAction(ISD::UREM, VT, Expand);
360 }
361
362 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
363 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
364 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
365 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
366 setOperationAction(ISD::BR_CC, VT, Expand);
367 setOperationAction(ISD::SELECT_CC, VT, Expand);
368 }
369 if (Subtarget.is64Bit())
370 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
371 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
372 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
373 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
374
375 setOperationAction(ISD::FREM , MVT::f32 , Expand);
376 setOperationAction(ISD::FREM , MVT::f64 , Expand);
377 setOperationAction(ISD::FREM , MVT::f80 , Expand);
378 setOperationAction(ISD::FREM , MVT::f128 , Expand);
379
380 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
381 setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
382 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
383 }
384
385 // Promote the i8 variants and force them on up to i32 which has a shorter
386 // encoding.
387 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
388 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
389 // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
390 // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
391 // promote that too.
392 setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
393 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
394
395 if (!Subtarget.hasBMI()) {
396 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
397 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
398 if (Subtarget.is64Bit()) {
399 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
400 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
401 }
402 }
403
404 if (Subtarget.hasLZCNT()) {
405 // When promoting the i8 variants, force them to i32 for a shorter
406 // encoding.
407 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
408 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
409 } else {
410 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
411 if (VT == MVT::i64 && !Subtarget.is64Bit())
412 continue;
413 setOperationAction(ISD::CTLZ , VT, Custom);
414 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
415 }
416 }
417
418 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
419 ISD::STRICT_FP_TO_FP16}) {
420 // Special handling for half-precision floating point conversions.
421 // If we don't have F16C support, then lower half float conversions
422 // into library calls.
423 setOperationAction(
424 Op, MVT::f32,
425 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
426 // There's never any support for operations beyond MVT::f32.
427 setOperationAction(Op, MVT::f64, Expand);
428 setOperationAction(Op, MVT::f80, Expand);
429 setOperationAction(Op, MVT::f128, Expand);
430 }
431
432 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
433 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
434 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
435 setTruncStoreAction(VT, MVT::f16, Expand);
436 setTruncStoreAction(VT, MVT::bf16, Expand);
437
438 setOperationAction(ISD::BF16_TO_FP, VT, Expand);
439 setOperationAction(ISD::FP_TO_BF16, VT, Custom);
440 }
441
442 setOperationAction(ISD::PARITY, MVT::i8, Custom);
443 setOperationAction(ISD::PARITY, MVT::i16, Custom);
444 setOperationAction(ISD::PARITY, MVT::i32, Custom);
445 if (Subtarget.is64Bit())
446 setOperationAction(ISD::PARITY, MVT::i64, Custom);
447 if (Subtarget.hasPOPCNT()) {
448 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
449 // popcntw is longer to encode than popcntl and also has a false dependency
450 // on the dest that popcntl hasn't had since Cannon Lake.
451 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
452 } else {
453 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
454 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
455 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
456 if (Subtarget.is64Bit())
457 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
458 else
459 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
460 }
461
462 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
463
464 if (!Subtarget.hasMOVBE())
465 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
466
467 // X86 wants to expand cmov itself.
468 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
469 setOperationAction(ISD::SELECT, VT, Custom);
470 setOperationAction(ISD::SETCC, VT, Custom);
471 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
472 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
473 }
474 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
475 if (VT == MVT::i64 && !Subtarget.is64Bit())
476 continue;
477 setOperationAction(ISD::SELECT, VT, Custom);
478 setOperationAction(ISD::SETCC, VT, Custom);
479 }
480
481 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
482 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
483 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
484
485 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
486 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
487 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
488 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
489 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
490 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
491 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
492 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
493
494 // Darwin ABI issue.
495 for (auto VT : { MVT::i32, MVT::i64 }) {
496 if (VT == MVT::i64 && !Subtarget.is64Bit())
497 continue;
498 setOperationAction(ISD::ConstantPool , VT, Custom);
499 setOperationAction(ISD::JumpTable , VT, Custom);
500 setOperationAction(ISD::GlobalAddress , VT, Custom);
501 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
502 setOperationAction(ISD::ExternalSymbol , VT, Custom);
503 setOperationAction(ISD::BlockAddress , VT, Custom);
504 }
505
506 // 64-bit shl, sra, srl (iff 32-bit x86)
507 for (auto VT : { MVT::i32, MVT::i64 }) {
508 if (VT == MVT::i64 && !Subtarget.is64Bit())
509 continue;
510 setOperationAction(ISD::SHL_PARTS, VT, Custom);
511 setOperationAction(ISD::SRA_PARTS, VT, Custom);
512 setOperationAction(ISD::SRL_PARTS, VT, Custom);
513 }
514
515 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
516 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
517
518 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
519
520 // Expand certain atomics
521 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
522 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
523 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
524 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
525 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
526 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
527 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
528 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
529 }
530
531 if (!Subtarget.is64Bit())
532 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
533
534 if (Subtarget.canUseCMPXCHG16B())
535 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
536
537 // FIXME - use subtarget debug flags
538 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
539 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
540 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
541 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
542 }
543
544 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
545 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
546
547 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
548 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
549
550 setOperationAction(ISD::TRAP, MVT::Other, Legal);
551 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
552 if (Subtarget.isTargetPS())
553 setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
554 else
555 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
556
557 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
558 setOperationAction(ISD::VASTART , MVT::Other, Custom);
559 setOperationAction(ISD::VAEND , MVT::Other, Expand);
560 bool Is64Bit = Subtarget.is64Bit();
561 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
562 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
563
564 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
565 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
566
567 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
568
569 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
570 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
571 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
572
573 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
574
575 auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
576 setOperationAction(ISD::FABS, VT, Action);
577 setOperationAction(ISD::FNEG, VT, Action);
578 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
579 setOperationAction(ISD::FREM, VT, Action);
580 setOperationAction(ISD::FMA, VT, Action);
581 setOperationAction(ISD::FMINNUM, VT, Action);
582 setOperationAction(ISD::FMAXNUM, VT, Action);
583 setOperationAction(ISD::FMINIMUM, VT, Action);
584 setOperationAction(ISD::FMAXIMUM, VT, Action);
585 setOperationAction(ISD::FSIN, VT, Action);
586 setOperationAction(ISD::FCOS, VT, Action);
587 setOperationAction(ISD::FSINCOS, VT, Action);
588 setOperationAction(ISD::FSQRT, VT, Action);
589 setOperationAction(ISD::FPOW, VT, Action);
590 setOperationAction(ISD::FLOG, VT, Action);
591 setOperationAction(ISD::FLOG2, VT, Action);
592 setOperationAction(ISD::FLOG10, VT, Action);
593 setOperationAction(ISD::FEXP, VT, Action);
594 setOperationAction(ISD::FEXP2, VT, Action);
595 setOperationAction(ISD::FCEIL, VT, Action);
596 setOperationAction(ISD::FFLOOR, VT, Action);
597 setOperationAction(ISD::FNEARBYINT, VT, Action);
598 setOperationAction(ISD::FRINT, VT, Action);
599 setOperationAction(ISD::BR_CC, VT, Action);
600 setOperationAction(ISD::SETCC, VT, Action);
601 setOperationAction(ISD::SELECT, VT, Custom);
602 setOperationAction(ISD::SELECT_CC, VT, Action);
603 setOperationAction(ISD::FROUND, VT, Action);
604 setOperationAction(ISD::FROUNDEVEN, VT, Action);
605 setOperationAction(ISD::FTRUNC, VT, Action);
606 };
607
608 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
609 // f16, f32 and f64 use SSE.
610 // Set up the FP register classes.
611 addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
612 : &X86::FR16RegClass);
613 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
614 : &X86::FR32RegClass);
615 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
616 : &X86::FR64RegClass);
617
618 // Disable f32->f64 extload as we can only generate this in one instruction
619 // under optsize. So its easier to pattern match (fpext (load)) for that
620 // case instead of needing to emit 2 instructions for extload in the
621 // non-optsize case.
622 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
623
624 for (auto VT : { MVT::f32, MVT::f64 }) {
625 // Use ANDPD to simulate FABS.
626 setOperationAction(ISD::FABS, VT, Custom);
627
628 // Use XORP to simulate FNEG.
629 setOperationAction(ISD::FNEG, VT, Custom);
630
631 // Use ANDPD and ORPD to simulate FCOPYSIGN.
632 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
633
634 // These might be better off as horizontal vector ops.
635 setOperationAction(ISD::FADD, VT, Custom);
636 setOperationAction(ISD::FSUB, VT, Custom);
637
638 // We don't support sin/cos/fmod
639 setOperationAction(ISD::FSIN , VT, Expand);
640 setOperationAction(ISD::FCOS , VT, Expand);
641 setOperationAction(ISD::FSINCOS, VT, Expand);
642 }
643
644 // Half type will be promoted by default.
645 setF16Action(MVT::f16, Promote);
646 setOperationAction(ISD::FADD, MVT::f16, Promote);
647 setOperationAction(ISD::FSUB, MVT::f16, Promote);
648 setOperationAction(ISD::FMUL, MVT::f16, Promote);
649 setOperationAction(ISD::FDIV, MVT::f16, Promote);
650 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
651 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
652 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
653
654 setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote);
655 setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote);
656 setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote);
657 setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote);
658 setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote);
659 setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote);
660 setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote);
661 setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote);
662 setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
663 setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
664 setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
665 setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
666 setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
667 setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
668 setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote);
669 setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote);
670 setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote);
671 setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote);
672 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote);
673 setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote);
674 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote);
675 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote);
676 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
677 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
678 setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
679 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
680 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
681 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
682
683 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
684 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
685
686 // Lower this to MOVMSK plus an AND.
687 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
688 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
689
690 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
691 (UseX87 || Is64Bit)) {
692 // Use SSE for f32, x87 for f64.
693 // Set up the FP register classes.
694 addRegisterClass(MVT::f32, &X86::FR32RegClass);
695 if (UseX87)
696 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
697
698 // Use ANDPS to simulate FABS.
699 setOperationAction(ISD::FABS , MVT::f32, Custom);
700
701 // Use XORP to simulate FNEG.
702 setOperationAction(ISD::FNEG , MVT::f32, Custom);
703
704 if (UseX87)
705 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
706
707 // Use ANDPS and ORPS to simulate FCOPYSIGN.
708 if (UseX87)
709 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
710 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
711
712 // We don't support sin/cos/fmod
713 setOperationAction(ISD::FSIN , MVT::f32, Expand);
714 setOperationAction(ISD::FCOS , MVT::f32, Expand);
715 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
716
717 if (UseX87) {
718 // Always expand sin/cos functions even though x87 has an instruction.
719 setOperationAction(ISD::FSIN, MVT::f64, Expand);
720 setOperationAction(ISD::FCOS, MVT::f64, Expand);
721 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
722 }
723 } else if (UseX87) {
724 // f32 and f64 in x87.
725 // Set up the FP register classes.
726 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
727 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
728
729 for (auto VT : { MVT::f32, MVT::f64 }) {
730 setOperationAction(ISD::UNDEF, VT, Expand);
731 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
732
733 // Always expand sin/cos functions even though x87 has an instruction.
734 setOperationAction(ISD::FSIN , VT, Expand);
735 setOperationAction(ISD::FCOS , VT, Expand);
736 setOperationAction(ISD::FSINCOS, VT, Expand);
737 }
738 }
739
740 // Expand FP32 immediates into loads from the stack, save special cases.
741 if (isTypeLegal(MVT::f32)) {
742 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
743 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
744 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
745 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
746 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
747 } else // SSE immediates.
748 addLegalFPImmediate(APFloat(+0.0f)); // xorps
749 }
750 // Expand FP64 immediates into loads from the stack, save special cases.
751 if (isTypeLegal(MVT::f64)) {
752 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
753 addLegalFPImmediate(APFloat(+0.0)); // FLD0
754 addLegalFPImmediate(APFloat(+1.0)); // FLD1
755 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
756 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
757 } else // SSE immediates.
758 addLegalFPImmediate(APFloat(+0.0)); // xorpd
759 }
760 // Support fp16 0 immediate.
761 if (isTypeLegal(MVT::f16))
762 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
763
764 // Handle constrained floating-point operations of scalar.
765 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
766 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
767 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
768 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
769 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
770 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
771 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
772 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
773 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
774 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
775 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
776 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
777
778 // We don't support FMA.
779 setOperationAction(ISD::FMA, MVT::f64, Expand);
780 setOperationAction(ISD::FMA, MVT::f32, Expand);
781
782 // f80 always uses X87.
783 if (UseX87) {
784 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
785 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
786 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
787 {
788 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
789 addLegalFPImmediate(TmpFlt); // FLD0
790 TmpFlt.changeSign();
791 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
792
793 bool ignored;
794 APFloat TmpFlt2(+1.0);
795 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
796 &ignored);
797 addLegalFPImmediate(TmpFlt2); // FLD1
798 TmpFlt2.changeSign();
799 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
800 }
801
802 // Always expand sin/cos functions even though x87 has an instruction.
803 setOperationAction(ISD::FSIN , MVT::f80, Expand);
804 setOperationAction(ISD::FCOS , MVT::f80, Expand);
805 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
806
807 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
808 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
809 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
810 setOperationAction(ISD::FRINT, MVT::f80, Expand);
811 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
812 setOperationAction(ISD::FMA, MVT::f80, Expand);
813 setOperationAction(ISD::LROUND, MVT::f80, Expand);
814 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
815 setOperationAction(ISD::LRINT, MVT::f80, Custom);
816 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
817
818 // Handle constrained floating-point operations of scalar.
819 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
820 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
821 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
822 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
823 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
824 if (isTypeLegal(MVT::f16)) {
825 setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
826 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
827 } else {
828 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
829 }
830 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
831 // as Custom.
832 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
833 }
834
835 // f128 uses xmm registers, but most operations require libcalls.
836 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
837 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
838 : &X86::VR128RegClass);
839
840 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
841
842 setOperationAction(ISD::FADD, MVT::f128, LibCall);
843 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
844 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
845 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
846 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
847 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
848 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
849 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
850 setOperationAction(ISD::FMA, MVT::f128, LibCall);
851 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
852
853 setOperationAction(ISD::FABS, MVT::f128, Custom);
854 setOperationAction(ISD::FNEG, MVT::f128, Custom);
855 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
856
857 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
858 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
859 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
860 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
861 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
862 // No STRICT_FSINCOS
863 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
864 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
865
866 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
867 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
868 // We need to custom handle any FP_ROUND with an f128 input, but
869 // LegalizeDAG uses the result type to know when to run a custom handler.
870 // So we have to list all legal floating point result types here.
871 if (isTypeLegal(MVT::f32)) {
872 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
873 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
874 }
875 if (isTypeLegal(MVT::f64)) {
876 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
877 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
878 }
879 if (isTypeLegal(MVT::f80)) {
880 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
881 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
882 }
883
884 setOperationAction(ISD::SETCC, MVT::f128, Custom);
885
886 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
887 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
888 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
889 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
890 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
891 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
892 }
893
894 // Always use a library call for pow.
895 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
896 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
897 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
898 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
899
900 setOperationAction(ISD::FLOG, MVT::f80, Expand);
901 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
902 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
903 setOperationAction(ISD::FEXP, MVT::f80, Expand);
904 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
905 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
906 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
907
908 // Some FP actions are always expanded for vector types.
909 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
910 MVT::v4f32, MVT::v8f32, MVT::v16f32,
911 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
912 setOperationAction(ISD::FSIN, VT, Expand);
913 setOperationAction(ISD::FSINCOS, VT, Expand);
914 setOperationAction(ISD::FCOS, VT, Expand);
915 setOperationAction(ISD::FREM, VT, Expand);
916 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
917 setOperationAction(ISD::FPOW, VT, Expand);
918 setOperationAction(ISD::FLOG, VT, Expand);
919 setOperationAction(ISD::FLOG2, VT, Expand);
920 setOperationAction(ISD::FLOG10, VT, Expand);
921 setOperationAction(ISD::FEXP, VT, Expand);
922 setOperationAction(ISD::FEXP2, VT, Expand);
923 }
924
925 // First set operation action for all vector types to either promote
926 // (for widening) or expand (for scalarization). Then we will selectively
927 // turn on ones that can be effectively codegen'd.
928 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
929 setOperationAction(ISD::SDIV, VT, Expand);
930 setOperationAction(ISD::UDIV, VT, Expand);
931 setOperationAction(ISD::SREM, VT, Expand);
932 setOperationAction(ISD::UREM, VT, Expand);
933 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
934 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
935 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
936 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
937 setOperationAction(ISD::FMA, VT, Expand);
938 setOperationAction(ISD::FFLOOR, VT, Expand);
939 setOperationAction(ISD::FCEIL, VT, Expand);
940 setOperationAction(ISD::FTRUNC, VT, Expand);
941 setOperationAction(ISD::FRINT, VT, Expand);
942 setOperationAction(ISD::FNEARBYINT, VT, Expand);
943 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
944 setOperationAction(ISD::MULHS, VT, Expand);
945 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
946 setOperationAction(ISD::MULHU, VT, Expand);
947 setOperationAction(ISD::SDIVREM, VT, Expand);
948 setOperationAction(ISD::UDIVREM, VT, Expand);
949 setOperationAction(ISD::CTPOP, VT, Expand);
950 setOperationAction(ISD::CTTZ, VT, Expand);
951 setOperationAction(ISD::CTLZ, VT, Expand);
952 setOperationAction(ISD::ROTL, VT, Expand);
953 setOperationAction(ISD::ROTR, VT, Expand);
954 setOperationAction(ISD::BSWAP, VT, Expand);
955 setOperationAction(ISD::SETCC, VT, Expand);
956 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
957 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
958 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
959 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
960 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
961 setOperationAction(ISD::TRUNCATE, VT, Expand);
962 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
963 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
964 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
965 setOperationAction(ISD::SELECT_CC, VT, Expand);
966 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
967 setTruncStoreAction(InnerVT, VT, Expand);
968
969 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
970 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
971
972 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
973 // types, we have to deal with them whether we ask for Expansion or not.
974 // Setting Expand causes its own optimisation problems though, so leave
975 // them legal.
976 if (VT.getVectorElementType() == MVT::i1)
977 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
978
979 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
980 // split/scalarized right now.
981 if (VT.getVectorElementType() == MVT::f16 ||
982 VT.getVectorElementType() == MVT::bf16)
983 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
984 }
985 }
986
987 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
988 // with -msoft-float, disable use of MMX as well.
989 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
990 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
991 // No operations on x86mmx supported, everything uses intrinsics.
992 }
993
994 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
995 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
996 : &X86::VR128RegClass);
997
998 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
999 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
1000 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
1001 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
1002 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
1003 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
1004 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1005 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
1006
1007 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
1008 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
1009
1010 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1011 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1012 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1013 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1014 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1015 }
1016
1017 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
1018 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1019 : &X86::VR128RegClass);
1020
1021 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
1022 // registers cannot be used even for integer operations.
1023 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
1024 : &X86::VR128RegClass);
1025 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1026 : &X86::VR128RegClass);
1027 addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1028 : &X86::VR128RegClass);
1029 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1030 : &X86::VR128RegClass);
1031 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1032 : &X86::VR128RegClass);
1033
1034 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1035 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1036 setOperationAction(ISD::SDIV, VT, Custom);
1037 setOperationAction(ISD::SREM, VT, Custom);
1038 setOperationAction(ISD::UDIV, VT, Custom);
1039 setOperationAction(ISD::UREM, VT, Custom);
1040 }
1041
1042 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
1043 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
1044 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
1045
1046 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1047 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1048 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1049 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
1050 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
1051 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
1052 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
1053 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
1054 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
1055 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1056 setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
1057 setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
1058
1059 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
1060 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
1061 setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1062
1063 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1064 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1065 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
1066
1067 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1068 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1069 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
1070 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
1071 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
1072 }
1073
1074 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
1075 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
1076 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
1077 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
1078 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
1079 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
1080 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
1081 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
1082 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
1083 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
1084
1085 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1086 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1087 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1088 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1089
1090 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1091 setOperationAction(ISD::SETCC, VT, Custom);
1092 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1093 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1094 setOperationAction(ISD::CTPOP, VT, Custom);
1095 setOperationAction(ISD::ABS, VT, Custom);
1096
1097 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1098 // setcc all the way to isel and prefer SETGT in some isel patterns.
1099 setCondCodeAction(ISD::SETLT, VT, Custom);
1100 setCondCodeAction(ISD::SETLE, VT, Custom);
1101 }
1102
1103 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1104 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1105 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1106 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1107 setOperationAction(ISD::VSELECT, VT, Custom);
1108 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1109 }
1110
1111 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1112 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1113 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1114 setOperationAction(ISD::VSELECT, VT, Custom);
1115
1116 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1117 continue;
1118
1119 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1120 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1121 }
1122 setF16Action(MVT::v8f16, Expand);
1123 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
1124 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
1125 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
1126 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
1127
1128 // Custom lower v2i64 and v2f64 selects.
1129 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1130 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1131 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1132 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1133 setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
1134 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1135
1136 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
1137 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1138 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1139 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1140 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
1141 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1142
1143 // Custom legalize these to avoid over promotion or custom promotion.
1144 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1145 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1146 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1147 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1148 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1149 }
1150
1151 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1152 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
1153 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1154 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1155
1156 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1157 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1158
1159 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1160 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1161
1162 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1163 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1164 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1165 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1166 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1167
1168 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1169 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1170 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1171 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1172
1173 // We want to legalize this to an f64 load rather than an i64 load on
1174 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1175 // store.
1176 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1177 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1178 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1179 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1180 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1181 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1182
1183 // Add 32-bit vector stores to help vectorization opportunities.
1184 setOperationAction(ISD::STORE, MVT::v2i16, Custom);
1185 setOperationAction(ISD::STORE, MVT::v4i8, Custom);
1186
1187 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1188 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1189 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1190 if (!Subtarget.hasAVX512())
1191 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1192
1193 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1194 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1195 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1196
1197 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1198
1199 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1200 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1201 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1202 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1203 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1204 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1205
1206 // In the customized shift lowering, the legal v4i32/v2i64 cases
1207 // in AVX2 will be recognized.
1208 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1209 setOperationAction(ISD::SRL, VT, Custom);
1210 setOperationAction(ISD::SHL, VT, Custom);
1211 setOperationAction(ISD::SRA, VT, Custom);
1212 if (VT == MVT::v2i64) continue;
1213 setOperationAction(ISD::ROTL, VT, Custom);
1214 setOperationAction(ISD::ROTR, VT, Custom);
1215 setOperationAction(ISD::FSHL, VT, Custom);
1216 setOperationAction(ISD::FSHR, VT, Custom);
1217 }
1218
1219 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1220 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1221 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1222 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1223 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1224 }
1225
1226 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1227 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1228 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1229 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1230 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1231 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1232 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1233 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1234 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1235
1236 // These might be better off as horizontal vector ops.
1237 setOperationAction(ISD::ADD, MVT::i16, Custom);
1238 setOperationAction(ISD::ADD, MVT::i32, Custom);
1239 setOperationAction(ISD::SUB, MVT::i16, Custom);
1240 setOperationAction(ISD::SUB, MVT::i32, Custom);
1241 }
1242
1243 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1244 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1245 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1246 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1247 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1248 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1249 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1250 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1251 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1252 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1253 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1254 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1255 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1256 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1257
1258 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1259 }
1260
1261 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1262 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1263 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1264 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1265 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1266 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1267 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1268 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1269
1270 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1271 setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
1272 setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
1273
1274 // FIXME: Do we need to handle scalar-to-vector here?
1275 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1276 setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
1277
1278 // We directly match byte blends in the backend as they match the VSELECT
1279 // condition form.
1280 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1281
1282 // SSE41 brings specific instructions for doing vector sign extend even in
1283 // cases where we don't have SRA.
1284 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1285 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1286 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1287 }
1288
1289 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1290 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1291 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1292 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1293 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1294 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1295 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1296 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1297 }
1298
1299 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1300 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1301 // do the pre and post work in the vector domain.
1302 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1303 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1304 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1305 // so that DAG combine doesn't try to turn it into uint_to_fp.
1306 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1307 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1308 }
1309 }
1310
1311 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1312 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1313 }
1314
1315 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1316 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1317 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1318 setOperationAction(ISD::ROTL, VT, Custom);
1319 setOperationAction(ISD::ROTR, VT, Custom);
1320 }
1321
1322 // XOP can efficiently perform BITREVERSE with VPPERM.
1323 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1324 setOperationAction(ISD::BITREVERSE, VT, Custom);
1325
1326 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1327 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1328 setOperationAction(ISD::BITREVERSE, VT, Custom);
1329 }
1330
1331 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1332 bool HasInt256 = Subtarget.hasInt256();
1333
1334 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1335 : &X86::VR256RegClass);
1336 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1337 : &X86::VR256RegClass);
1338 addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1339 : &X86::VR256RegClass);
1340 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1341 : &X86::VR256RegClass);
1342 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1343 : &X86::VR256RegClass);
1344 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1345 : &X86::VR256RegClass);
1346 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1347 : &X86::VR256RegClass);
1348
1349 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1350 setOperationAction(ISD::FFLOOR, VT, Legal);
1351 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1352 setOperationAction(ISD::FCEIL, VT, Legal);
1353 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1354 setOperationAction(ISD::FTRUNC, VT, Legal);
1355 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1356 setOperationAction(ISD::FRINT, VT, Legal);
1357 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1358 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1359 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1360 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1361 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1362
1363 setOperationAction(ISD::FROUND, VT, Custom);
1364
1365 setOperationAction(ISD::FNEG, VT, Custom);
1366 setOperationAction(ISD::FABS, VT, Custom);
1367 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1368 }
1369
1370 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1371 // even though v8i16 is a legal type.
1372 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1373 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1374 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1375 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1376 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
1377 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1378 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
1379
1380 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
1381 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
1382 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
1383 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
1384 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
1385 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
1386
1387 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1388 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1389 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1390 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1391 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1392 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1393 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1394 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1395 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1396 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1397 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1398
1399 if (!Subtarget.hasAVX512())
1400 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1401
1402 // In the customized shift lowering, the legal v8i32/v4i64 cases
1403 // in AVX2 will be recognized.
1404 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1405 setOperationAction(ISD::SRL, VT, Custom);
1406 setOperationAction(ISD::SHL, VT, Custom);
1407 setOperationAction(ISD::SRA, VT, Custom);
1408 if (VT == MVT::v4i64) continue;
1409 setOperationAction(ISD::ROTL, VT, Custom);
1410 setOperationAction(ISD::ROTR, VT, Custom);
1411 setOperationAction(ISD::FSHL, VT, Custom);
1412 setOperationAction(ISD::FSHR, VT, Custom);
1413 }
1414
1415 // These types need custom splitting if their input is a 128-bit vector.
1416 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1417 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1418 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1419 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1420
1421 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1422 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1423 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1424 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1425 setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
1426 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1427 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1428
1429 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1430 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1431 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1432 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1433 }
1434
1435 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1436 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1437 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1438 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1439
1440 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1441 setOperationAction(ISD::SETCC, VT, Custom);
1442 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1443 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1444 setOperationAction(ISD::CTPOP, VT, Custom);
1445 setOperationAction(ISD::CTLZ, VT, Custom);
1446
1447 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1448 // setcc all the way to isel and prefer SETGT in some isel patterns.
1449 setCondCodeAction(ISD::SETLT, VT, Custom);
1450 setCondCodeAction(ISD::SETLE, VT, Custom);
1451 }
1452
1453 if (Subtarget.hasAnyFMA()) {
1454 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1455 MVT::v2f64, MVT::v4f64 }) {
1456 setOperationAction(ISD::FMA, VT, Legal);
1457 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1458 }
1459 }
1460
1461 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1462 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1463 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1464 }
1465
1466 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1467 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1468 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1469 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1470
1471 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1472 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1473 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1474 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1475 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1476 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1477 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1478 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1479
1480 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1481 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1482
1483 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1484 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1485 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1486 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1487 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1488
1489 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1490 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1491 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1492 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1493 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1494 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1495 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1496 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1497 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1498 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1499 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1500 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1501
1502 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1503 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1504 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1505 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1506 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1507 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1508 }
1509
1510 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1511 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1512 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1513 }
1514
1515 if (HasInt256) {
1516 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1517 // when we have a 256bit-wide blend with immediate.
1518 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1519 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1520
1521 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1522 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1523 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1524 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1525 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1526 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1527 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1528 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1529 }
1530 }
1531
1532 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1533 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1534 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1535 setOperationAction(ISD::MSTORE, VT, Legal);
1536 }
1537
1538 // Extract subvector is special because the value type
1539 // (result) is 128-bit but the source is 256-bit wide.
1540 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1541 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1542 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1543 }
1544
1545 // Custom lower several nodes for 256-bit types.
1546 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1547 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1548 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1549 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1550 setOperationAction(ISD::VSELECT, VT, Custom);
1551 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1552 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1553 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1554 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1555 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1556 setOperationAction(ISD::STORE, VT, Custom);
1557 }
1558 setF16Action(MVT::v16f16, Expand);
1559 setOperationAction(ISD::FADD, MVT::v16f16, Expand);
1560 setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
1561 setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
1562 setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
1563
1564 if (HasInt256) {
1565 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1566
1567 // Custom legalize 2x32 to get a little better code.
1568 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1569 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1570
1571 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1572 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1573 setOperationAction(ISD::MGATHER, VT, Custom);
1574 }
1575 }
1576
1577 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1578 Subtarget.hasF16C()) {
1579 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1580 setOperationAction(ISD::FP_ROUND, VT, Custom);
1581 setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1582 }
1583 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) {
1584 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1585 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
1586 }
1587 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1588 setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
1589 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1590 }
1591
1592 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1593 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
1594 }
1595
1596 // This block controls legalization of the mask vector sizes that are
1597 // available with AVX512. 512-bit vectors are in a separate block controlled
1598 // by useAVX512Regs.
1599 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1600 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1601 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1602 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1603 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1604 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1605
1606 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1607 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1608 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1609
1610 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1611 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1612 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1613 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1614 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1615 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1616 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1617 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1618 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1619 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1620 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1621 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1622
1623 // There is no byte sized k-register load or store without AVX512DQ.
1624 if (!Subtarget.hasDQI()) {
1625 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1626 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1627 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1628 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1629
1630 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1631 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1632 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1633 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1634 }
1635
1636 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1637 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1638 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1639 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1640 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1641 }
1642
1643 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1644 setOperationAction(ISD::VSELECT, VT, Expand);
1645
1646 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1647 setOperationAction(ISD::SETCC, VT, Custom);
1648 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1649 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1650 setOperationAction(ISD::SELECT, VT, Custom);
1651 setOperationAction(ISD::TRUNCATE, VT, Custom);
1652
1653 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1654 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1655 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1656 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1657 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1658 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1659 }
1660
1661 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1662 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1663 }
1664
1665 // This block controls legalization for 512-bit operations with 32/64 bit
1666 // elements. 512-bits can be disabled based on prefer-vector-width and
1667 // required-vector-width function attributes.
1668 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1669 bool HasBWI = Subtarget.hasBWI();
1670
1671 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1672 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1673 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1674 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1675 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1676 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1677 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1678
1679 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1680 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1681 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1682 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1683 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1684 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1685 if (HasBWI)
1686 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1687 }
1688
1689 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1690 setOperationAction(ISD::FNEG, VT, Custom);
1691 setOperationAction(ISD::FABS, VT, Custom);
1692 setOperationAction(ISD::FMA, VT, Legal);
1693 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1694 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1695 }
1696
1697 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1698 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1699 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1700 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1701 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1702 }
1703 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom);
1704 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom);
1705 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom);
1706 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom);
1707 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
1708 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
1709 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
1710 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
1711 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
1712 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
1713
1714 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1715 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1716 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1717 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1718 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1719 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1720 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1721 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1722 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1723 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1724 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1725
1726 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1727 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1728 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1729 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1730 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1731 if (HasBWI)
1732 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1733
1734 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1735 // to 512-bit rather than use the AVX2 instructions so that we can use
1736 // k-masks.
1737 if (!Subtarget.hasVLX()) {
1738 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1739 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1740 setOperationAction(ISD::MLOAD, VT, Custom);
1741 setOperationAction(ISD::MSTORE, VT, Custom);
1742 }
1743 }
1744
1745 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1746 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1747 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1748 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1749 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1750 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1751 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1752 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1753 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1754 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1755 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1756 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1757 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1758
1759 if (HasBWI) {
1760 // Extends from v64i1 masks to 512-bit vectors.
1761 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1762 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1763 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1764 }
1765
1766 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1767 setOperationAction(ISD::FFLOOR, VT, Legal);
1768 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1769 setOperationAction(ISD::FCEIL, VT, Legal);
1770 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1771 setOperationAction(ISD::FTRUNC, VT, Legal);
1772 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1773 setOperationAction(ISD::FRINT, VT, Legal);
1774 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1775 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1776 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1777 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1778 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1779
1780 setOperationAction(ISD::FROUND, VT, Custom);
1781 }
1782
1783 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1784 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1785 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1786 }
1787
1788 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1789 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1790 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1791 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1792
1793 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1794 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1795 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1796 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1797
1798 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1799 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1800 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1801 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1802 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1803 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1804 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1805 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1806
1807 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1808 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1809
1810 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1811
1812 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1813 setOperationAction(ISD::SRL, VT, Custom);
1814 setOperationAction(ISD::SHL, VT, Custom);
1815 setOperationAction(ISD::SRA, VT, Custom);
1816 setOperationAction(ISD::ROTL, VT, Custom);
1817 setOperationAction(ISD::ROTR, VT, Custom);
1818 setOperationAction(ISD::SETCC, VT, Custom);
1819
1820 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1821 // setcc all the way to isel and prefer SETGT in some isel patterns.
1822 setCondCodeAction(ISD::SETLT, VT, Custom);
1823 setCondCodeAction(ISD::SETLE, VT, Custom);
1824 }
1825 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1826 setOperationAction(ISD::SMAX, VT, Legal);
1827 setOperationAction(ISD::UMAX, VT, Legal);
1828 setOperationAction(ISD::SMIN, VT, Legal);
1829 setOperationAction(ISD::UMIN, VT, Legal);
1830 setOperationAction(ISD::ABS, VT, Legal);
1831 setOperationAction(ISD::CTPOP, VT, Custom);
1832 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1833 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1834 }
1835
1836 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1837 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1838 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1839 setOperationAction(ISD::CTLZ, VT, Custom);
1840 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1841 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1842 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1843 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1844 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1845 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1846 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1847 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1848 }
1849
1850 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1851 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1852 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1853 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1854 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1855 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1856
1857 if (Subtarget.hasDQI()) {
1858 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1859 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1860 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
1861 setOperationAction(Opc, MVT::v8i64, Custom);
1862 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1863 }
1864
1865 if (Subtarget.hasCDI()) {
1866 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1867 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1868 setOperationAction(ISD::CTLZ, VT, Legal);
1869 }
1870 } // Subtarget.hasCDI()
1871
1872 if (Subtarget.hasVPOPCNTDQ()) {
1873 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1874 setOperationAction(ISD::CTPOP, VT, Legal);
1875 }
1876
1877 // Extract subvector is special because the value type
1878 // (result) is 256-bit but the source is 512-bit wide.
1879 // 128-bit was made Legal under AVX1.
1880 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1881 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1882 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1883
1884 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1885 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
1886 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1887 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1888 setOperationAction(ISD::SELECT, VT, Custom);
1889 setOperationAction(ISD::VSELECT, VT, Custom);
1890 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1891 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1892 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1893 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1894 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1895 }
1896 setF16Action(MVT::v32f16, Expand);
1897 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
1898 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom);
1899 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
1900 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
1901 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1902 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1903 setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
1904 }
1905
1906 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1907 setOperationAction(ISD::MLOAD, VT, Legal);
1908 setOperationAction(ISD::MSTORE, VT, Legal);
1909 setOperationAction(ISD::MGATHER, VT, Custom);
1910 setOperationAction(ISD::MSCATTER, VT, Custom);
1911 }
1912 if (HasBWI) {
1913 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1914 setOperationAction(ISD::MLOAD, VT, Legal);
1915 setOperationAction(ISD::MSTORE, VT, Legal);
1916 }
1917 } else {
1918 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1919 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1920 }
1921
1922 if (Subtarget.hasVBMI2()) {
1923 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1924 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1925 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1926 setOperationAction(ISD::FSHL, VT, Custom);
1927 setOperationAction(ISD::FSHR, VT, Custom);
1928 }
1929
1930 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1931 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1932 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1933 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1934 }
1935 }// useAVX512Regs
1936
1937 // This block controls legalization for operations that don't have
1938 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1939 // narrower widths.
1940 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1941 // These operations are handled on non-VLX by artificially widening in
1942 // isel patterns.
1943
1944 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
1945 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
1946 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1947
1948 if (Subtarget.hasDQI()) {
1949 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1950 // v2f32 UINT_TO_FP is already custom under SSE2.
1951 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1953, __extension__
__PRETTY_FUNCTION__))
1952 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1953, __extension__
__PRETTY_FUNCTION__))
1953 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1953, __extension__
__PRETTY_FUNCTION__))
;
1954 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1955 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1956 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1957 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1958 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1959 }
1960
1961 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1962 setOperationAction(ISD::SMAX, VT, Legal);
1963 setOperationAction(ISD::UMAX, VT, Legal);
1964 setOperationAction(ISD::SMIN, VT, Legal);
1965 setOperationAction(ISD::UMIN, VT, Legal);
1966 setOperationAction(ISD::ABS, VT, Legal);
1967 }
1968
1969 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1970 setOperationAction(ISD::ROTL, VT, Custom);
1971 setOperationAction(ISD::ROTR, VT, Custom);
1972 }
1973
1974 // Custom legalize 2x32 to get a little better code.
1975 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1976 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1977
1978 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1979 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1980 setOperationAction(ISD::MSCATTER, VT, Custom);
1981
1982 if (Subtarget.hasDQI()) {
1983 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1984 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1985 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) {
1986 setOperationAction(Opc, MVT::v2i64, Custom);
1987 setOperationAction(Opc, MVT::v4i64, Custom);
1988 }
1989 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1990 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1991 }
1992
1993 if (Subtarget.hasCDI()) {
1994 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1995 setOperationAction(ISD::CTLZ, VT, Legal);
1996 }
1997 } // Subtarget.hasCDI()
1998
1999 if (Subtarget.hasVPOPCNTDQ()) {
2000 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2001 setOperationAction(ISD::CTPOP, VT, Legal);
2002 }
2003 }
2004
2005 // This block control legalization of v32i1/v64i1 which are available with
2006 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
2007 // useBWIRegs.
2008 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2009 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
2010 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
2011
2012 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
2013 setOperationAction(ISD::VSELECT, VT, Expand);
2014 setOperationAction(ISD::TRUNCATE, VT, Custom);
2015 setOperationAction(ISD::SETCC, VT, Custom);
2016 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2017 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
2018 setOperationAction(ISD::SELECT, VT, Custom);
2019 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2020 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2021 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
2022 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
2023 }
2024
2025 for (auto VT : { MVT::v16i1, MVT::v32i1 })
2026 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
2027
2028 // Extends from v32i1 masks to 256-bit vectors.
2029 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
2030 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
2031 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
2032
2033 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2034 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
2035 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
2036 }
2037
2038 // These operations are handled on non-VLX by artificially widening in
2039 // isel patterns.
2040 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
2041
2042 if (Subtarget.hasBITALG()) {
2043 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2044 setOperationAction(ISD::CTPOP, VT, Legal);
2045 }
2046 }
2047
2048 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2049 auto setGroup = [&] (MVT VT) {
2050 setOperationAction(ISD::FADD, VT, Legal);
2051 setOperationAction(ISD::STRICT_FADD, VT, Legal);
2052 setOperationAction(ISD::FSUB, VT, Legal);
2053 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
2054 setOperationAction(ISD::FMUL, VT, Legal);
2055 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
2056 setOperationAction(ISD::FDIV, VT, Legal);
2057 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
2058 setOperationAction(ISD::FSQRT, VT, Legal);
2059 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
2060
2061 setOperationAction(ISD::FFLOOR, VT, Legal);
2062 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
2063 setOperationAction(ISD::FCEIL, VT, Legal);
2064 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
2065 setOperationAction(ISD::FTRUNC, VT, Legal);
2066 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
2067 setOperationAction(ISD::FRINT, VT, Legal);
2068 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
2069 setOperationAction(ISD::FNEARBYINT, VT, Legal);
2070 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
2071
2072 setOperationAction(ISD::LOAD, VT, Legal);
2073 setOperationAction(ISD::STORE, VT, Legal);
2074
2075 setOperationAction(ISD::FMA, VT, Legal);
2076 setOperationAction(ISD::STRICT_FMA, VT, Legal);
2077 setOperationAction(ISD::VSELECT, VT, Legal);
2078 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2079 setOperationAction(ISD::SELECT, VT, Custom);
2080
2081 setOperationAction(ISD::FNEG, VT, Custom);
2082 setOperationAction(ISD::FABS, VT, Custom);
2083 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
2084 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2085 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2086 };
2087
2088 // AVX512_FP16 scalar operations
2089 setGroup(MVT::f16);
2090 setOperationAction(ISD::FREM, MVT::f16, Promote);
2091 setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
2092 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
2093 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
2094 setOperationAction(ISD::SETCC, MVT::f16, Custom);
2095 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
2096 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
2097 setOperationAction(ISD::FROUND, MVT::f16, Custom);
2098 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
2099 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
2100 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
2101 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
2102 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
2103 setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
2104 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
2105
2106 setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
2107 setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
2108
2109 if (Subtarget.useAVX512Regs()) {
2110 setGroup(MVT::v32f16);
2111 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
2112 setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
2113 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
2114 setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
2115 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
2116 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
2117 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
2118 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
2119 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
2120 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal);
2121 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
2122 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
2123
2124 setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
2125 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
2126 setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
2127 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
2128 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
2129 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
2130 MVT::v32i16);
2131 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2132 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
2133 MVT::v32i16);
2134 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2135 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
2136 MVT::v32i16);
2137 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2138 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
2139 MVT::v32i16);
2140
2141 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
2142 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
2143 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
2144
2145 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2146 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2147
2148 setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
2149 setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
2150 }
2151
2152 if (Subtarget.hasVLX()) {
2153 setGroup(MVT::v8f16);
2154 setGroup(MVT::v16f16);
2155
2156 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
2157 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
2158 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
2159 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
2160 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
2161 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
2162 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
2163 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
2164 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
2165 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
2166
2167 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
2168 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
2169 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
2170 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
2171 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
2172 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
2173 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
2174 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
2175 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
2176 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
2177
2178 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2179 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
2180 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
2181
2182 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
2183 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
2184 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
2185
2186 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2187 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2188 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2189 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2190
2191 // Need to custom widen these to prevent scalarization.
2192 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2193 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2194 }
2195 }
2196
2197 if (!Subtarget.useSoftFloat() &&
2198 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2199 addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
2200 addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
2201 // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
2202 // provide the method to promote BUILD_VECTOR. Set the operation action
2203 // Custom to do the customization later.
2204 setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
2205 for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2206 setF16Action(VT, Expand);
2207 setOperationAction(ISD::FADD, VT, Expand);
2208 setOperationAction(ISD::FSUB, VT, Expand);
2209 setOperationAction(ISD::FMUL, VT, Expand);
2210 setOperationAction(ISD::FDIV, VT, Expand);
2211 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2212 }
2213 addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
2214 }
2215
2216 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2217 addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
2218 setF16Action(MVT::v32bf16, Expand);
2219 setOperationAction(ISD::FADD, MVT::v32bf16, Expand);
2220 setOperationAction(ISD::FSUB, MVT::v32bf16, Expand);
2221 setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
2222 setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
2223 setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
2224 }
2225
2226 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2227 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2228 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2229 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2230 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2231 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2232
2233 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2234 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2235 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2236 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2237 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2238
2239 if (Subtarget.hasBWI()) {
2240 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2241 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2242 }
2243
2244 if (Subtarget.hasFP16()) {
2245 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2246 setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
2247 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
2248 setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
2249 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
2250 setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
2251 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
2252 setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
2253 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
2254 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2255 setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
2256 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
2257 setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
2258 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
2259 setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
2260 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
2261 setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
2262 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
2263 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2264 setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
2265 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
2266 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
2267 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
2268 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2269 setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
2270 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
2271 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
2272 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
2273 }
2274
2275 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
2276 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
2277 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
2278 }
2279
2280 if (Subtarget.hasAMXTILE()) {
2281 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2282 }
2283
2284 // We want to custom lower some of our intrinsics.
2285 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
2286 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
2287 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
2288 if (!Subtarget.is64Bit()) {
2289 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
2290 }
2291
2292 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2293 // handle type legalization for these operations here.
2294 //
2295 // FIXME: We really should do custom legalization for addition and
2296 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2297 // than generic legalization for 64-bit multiplication-with-overflow, though.
2298 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2299 if (VT == MVT::i64 && !Subtarget.is64Bit())
2300 continue;
2301 // Add/Sub/Mul with overflow operations are custom lowered.
2302 setOperationAction(ISD::SADDO, VT, Custom);
2303 setOperationAction(ISD::UADDO, VT, Custom);
2304 setOperationAction(ISD::SSUBO, VT, Custom);
2305 setOperationAction(ISD::USUBO, VT, Custom);
2306 setOperationAction(ISD::SMULO, VT, Custom);
2307 setOperationAction(ISD::UMULO, VT, Custom);
2308
2309 // Support carry in as value rather than glue.
2310 setOperationAction(ISD::ADDCARRY, VT, Custom);
2311 setOperationAction(ISD::SUBCARRY, VT, Custom);
2312 setOperationAction(ISD::SETCCCARRY, VT, Custom);
2313 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
2314 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
2315 }
2316
2317 if (!Subtarget.is64Bit()) {
2318 // These libcalls are not available in 32-bit.
2319 setLibcallName(RTLIB::SHL_I128, nullptr);
2320 setLibcallName(RTLIB::SRL_I128, nullptr);
2321 setLibcallName(RTLIB::SRA_I128, nullptr);
2322 setLibcallName(RTLIB::MUL_I128, nullptr);
2323 // The MULO libcall is not part of libgcc, only compiler-rt.
2324 setLibcallName(RTLIB::MULO_I64, nullptr);
2325 }
2326 // The MULO libcall is not part of libgcc, only compiler-rt.
2327 setLibcallName(RTLIB::MULO_I128, nullptr);
2328
2329 // Combine sin / cos into _sincos_stret if it is available.
2330 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2331 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2332 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
2333 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
2334 }
2335
2336 if (Subtarget.isTargetWin64()) {
2337 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2338 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2339 setOperationAction(ISD::SREM, MVT::i128, Custom);
2340 setOperationAction(ISD::UREM, MVT::i128, Custom);
2341 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
2342 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
2343 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
2344 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
2345 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
2346 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
2347 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
2348 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
2349 }
2350
2351 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2352 // is. We should promote the value to 64-bits to solve this.
2353 // This is what the CRT headers do - `fmodf` is an inline header
2354 // function casting to f64 and calling `fmod`.
2355 if (Subtarget.is32Bit() &&
2356 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2357 for (ISD::NodeType Op :
2358 {ISD::FCEIL, ISD::STRICT_FCEIL,
2359 ISD::FCOS, ISD::STRICT_FCOS,
2360 ISD::FEXP, ISD::STRICT_FEXP,
2361 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2362 ISD::FREM, ISD::STRICT_FREM,
2363 ISD::FLOG, ISD::STRICT_FLOG,
2364 ISD::FLOG10, ISD::STRICT_FLOG10,
2365 ISD::FPOW, ISD::STRICT_FPOW,
2366 ISD::FSIN, ISD::STRICT_FSIN})
2367 if (isOperationExpand(Op, MVT::f32))
2368 setOperationAction(Op, MVT::f32, Promote);
2369
2370 // We have target-specific dag combine patterns for the following nodes:
2371 setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
2372 ISD::SCALAR_TO_VECTOR,
2373 ISD::INSERT_VECTOR_ELT,
2374 ISD::EXTRACT_VECTOR_ELT,
2375 ISD::CONCAT_VECTORS,
2376 ISD::INSERT_SUBVECTOR,
2377 ISD::EXTRACT_SUBVECTOR,
2378 ISD::BITCAST,
2379 ISD::VSELECT,
2380 ISD::SELECT,
2381 ISD::SHL,
2382 ISD::SRA,
2383 ISD::SRL,
2384 ISD::OR,
2385 ISD::AND,
2386 ISD::ADD,
2387 ISD::FADD,
2388 ISD::FSUB,
2389 ISD::FNEG,
2390 ISD::FMA,
2391 ISD::STRICT_FMA,
2392 ISD::FMINNUM,
2393 ISD::FMAXNUM,
2394 ISD::SUB,
2395 ISD::LOAD,
2396 ISD::MLOAD,
2397 ISD::STORE,
2398 ISD::MSTORE,
2399 ISD::TRUNCATE,
2400 ISD::ZERO_EXTEND,
2401 ISD::ANY_EXTEND,
2402 ISD::SIGN_EXTEND,
2403 ISD::SIGN_EXTEND_INREG,
2404 ISD::ANY_EXTEND_VECTOR_INREG,
2405 ISD::SIGN_EXTEND_VECTOR_INREG,
2406 ISD::ZERO_EXTEND_VECTOR_INREG,
2407 ISD::SINT_TO_FP,
2408 ISD::UINT_TO_FP,
2409 ISD::STRICT_SINT_TO_FP,
2410 ISD::STRICT_UINT_TO_FP,
2411 ISD::SETCC,
2412 ISD::MUL,
2413 ISD::XOR,
2414 ISD::MSCATTER,
2415 ISD::MGATHER,
2416 ISD::FP16_TO_FP,
2417 ISD::FP_EXTEND,
2418 ISD::STRICT_FP_EXTEND,
2419 ISD::FP_ROUND,
2420 ISD::STRICT_FP_ROUND});
2421
2422 computeRegisterProperties(Subtarget.getRegisterInfo());
2423
2424 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2425 MaxStoresPerMemsetOptSize = 8;
2426 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2427 MaxStoresPerMemcpyOptSize = 4;
2428 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2429 MaxStoresPerMemmoveOptSize = 4;
2430
2431 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2432 // that needs to benchmarked and balanced with the potential use of vector
2433 // load/store types (PR33329, PR33914).
2434 MaxLoadsPerMemcmp = 2;
2435 MaxLoadsPerMemcmpOptSize = 2;
2436
2437 // Default loop alignment, which can be overridden by -align-loops.
2438 setPrefLoopAlignment(Align(16));
2439
2440 // An out-of-order CPU can speculatively execute past a predictable branch,
2441 // but a conditional move could be stalled by an expensive earlier operation.
2442 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2443 EnableExtLdPromotion = true;
2444 setPrefFunctionAlignment(Align(16));
2445
2446 verifyIntrinsicTables();
2447
2448 // Default to having -disable-strictnode-mutation on
2449 IsStrictFPEnabled = true;
2450}
2451
2452// This has so far only been implemented for 64-bit MachO.
2453bool X86TargetLowering::useLoadStackGuardNode() const {
2454 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2455}
2456
2457bool X86TargetLowering::useStackGuardXorFP() const {
2458 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2459 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2460}
2461
2462SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2463 const SDLoc &DL) const {
2464 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2465 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2466 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2467 return SDValue(Node, 0);
2468}
2469
2470TargetLoweringBase::LegalizeTypeAction
2471X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2472 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2473 !Subtarget.hasBWI())
2474 return TypeSplitVector;
2475
2476 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2477 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
2478 return TypeSplitVector;
2479
2480 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2481 VT.getVectorElementType() != MVT::i1)
2482 return TypeWidenVector;
2483
2484 return TargetLoweringBase::getPreferredVectorAction(VT);
2485}
2486
2487static std::pair<MVT, unsigned>
2488handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2489 const X86Subtarget &Subtarget) {
2490 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2491 // convention is one that uses k registers.
2492 if (NumElts == 2)
2493 return {MVT::v2i64, 1};
2494 if (NumElts == 4)
2495 return {MVT::v4i32, 1};
2496 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2497 CC != CallingConv::Intel_OCL_BI)
2498 return {MVT::v8i16, 1};
2499 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2500 CC != CallingConv::Intel_OCL_BI)
2501 return {MVT::v16i8, 1};
2502 // v32i1 passes in ymm unless we have BWI and the calling convention is
2503 // regcall.
2504 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2505 return {MVT::v32i8, 1};
2506 // Split v64i1 vectors if we don't have v64i8 available.
2507 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2508 if (Subtarget.useAVX512Regs())
2509 return {MVT::v64i8, 1};
2510 return {MVT::v32i8, 2};
2511 }
2512
2513 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2514 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2515 NumElts > 64)
2516 return {MVT::i8, NumElts};
2517
2518 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2519}
2520
2521MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2522 CallingConv::ID CC,
2523 EVT VT) const {
2524 if (VT.isVector()) {
2525 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2526 unsigned NumElts = VT.getVectorNumElements();
2527
2528 MVT RegisterVT;
2529 unsigned NumRegisters;
2530 std::tie(RegisterVT, NumRegisters) =
2531 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2532 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2533 return RegisterVT;
2534 }
2535
2536 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2537 return MVT::v8f16;
2538 }
2539
2540 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
2541 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
2542 !Subtarget.hasX87())
2543 return MVT::i32;
2544
2545 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2546 return getRegisterTypeForCallingConv(Context, CC,
2547 VT.changeVectorElementTypeToInteger());
2548
2549 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2550}
2551
2552unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2553 CallingConv::ID CC,
2554 EVT VT) const {
2555 if (VT.isVector()) {
2556 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2557 unsigned NumElts = VT.getVectorNumElements();
2558
2559 MVT RegisterVT;
2560 unsigned NumRegisters;
2561 std::tie(RegisterVT, NumRegisters) =
2562 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2563 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2564 return NumRegisters;
2565 }
2566
2567 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2568 return 1;
2569 }
2570
2571 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
2572 // x87 is disabled.
2573 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
2574 if (VT == MVT::f64)
2575 return 2;
2576 if (VT == MVT::f80)
2577 return 3;
2578 }
2579
2580 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2581 return getNumRegistersForCallingConv(Context, CC,
2582 VT.changeVectorElementTypeToInteger());
2583
2584 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2585}
2586
2587unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2588 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2589 unsigned &NumIntermediates, MVT &RegisterVT) const {
2590 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2591 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2592 Subtarget.hasAVX512() &&
2593 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2594 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2595 VT.getVectorNumElements() > 64)) {
2596 RegisterVT = MVT::i8;
2597 IntermediateVT = MVT::i1;
2598 NumIntermediates = VT.getVectorNumElements();
2599 return NumIntermediates;
2600 }
2601
2602 // Split v64i1 vectors if we don't have v64i8 available.
2603 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2604 CC != CallingConv::X86_RegCall) {
2605 RegisterVT = MVT::v32i8;
2606 IntermediateVT = MVT::v32i1;
2607 NumIntermediates = 2;
2608 return 2;
2609 }
2610
2611 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2612 NumIntermediates, RegisterVT);
2613}
2614
2615EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2616 LLVMContext& Context,
2617 EVT VT) const {
2618 if (!VT.isVector())
2619 return MVT::i8;
2620
2621 if (Subtarget.hasAVX512()) {
2622 // Figure out what this type will be legalized to.
2623 EVT LegalVT = VT;
2624 while (getTypeAction(Context, LegalVT) != TypeLegal)
2625 LegalVT = getTypeToTransformTo(Context, LegalVT);
2626
2627 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2628 if (LegalVT.getSimpleVT().is512BitVector())
2629 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2630
2631 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2632 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2633 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2634 // vXi16/vXi8.
2635 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2636 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2637 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2638 }
2639 }
2640
2641 return VT.changeVectorElementTypeToInteger();
2642}
2643
2644/// Helper for getByValTypeAlignment to determine
2645/// the desired ByVal argument alignment.
2646static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2647 if (MaxAlign == 16)
2648 return;
2649 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2650 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
2651 MaxAlign = Align(16);
2652 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2653 Align EltAlign;
2654 getMaxByValAlign(ATy->getElementType(), EltAlign);
2655 if (EltAlign > MaxAlign)
2656 MaxAlign = EltAlign;
2657 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2658 for (auto *EltTy : STy->elements()) {
2659 Align EltAlign;
2660 getMaxByValAlign(EltTy, EltAlign);
2661 if (EltAlign > MaxAlign)
2662 MaxAlign = EltAlign;
2663 if (MaxAlign == 16)
2664 break;
2665 }
2666 }
2667}
2668
2669/// Return the desired alignment for ByVal aggregate
2670/// function arguments in the caller parameter area. For X86, aggregates
2671/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2672/// are at 4-byte boundaries.
2673uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
2674 const DataLayout &DL) const {
2675 if (Subtarget.is64Bit()) {
2676 // Max of 8 and alignment of type.
2677 Align TyAlign = DL.getABITypeAlign(Ty);
2678 if (TyAlign > 8)
2679 return TyAlign.value();
2680 return 8;
2681 }
2682
2683 Align Alignment(4);
2684 if (Subtarget.hasSSE1())
2685 getMaxByValAlign(Ty, Alignment);
2686 return Alignment.value();
2687}
2688
2689/// It returns EVT::Other if the type should be determined using generic
2690/// target-independent logic.
2691/// For vector ops we check that the overall size isn't larger than our
2692/// preferred vector width.
2693EVT X86TargetLowering::getOptimalMemOpType(
2694 const MemOp &Op, const AttributeList &FuncAttributes) const {
2695 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2696 if (Op.size() >= 16 &&
2697 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2698 // FIXME: Check if unaligned 64-byte accesses are slow.
2699 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2700 (Subtarget.getPreferVectorWidth() >= 512)) {
2701 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2702 }
2703 // FIXME: Check if unaligned 32-byte accesses are slow.
2704 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2705 Subtarget.useLight256BitInstructions()) {
2706 // Although this isn't a well-supported type for AVX1, we'll let
2707 // legalization and shuffle lowering produce the optimal codegen. If we
2708 // choose an optimal type with a vector element larger than a byte,
2709 // getMemsetStores() may create an intermediate splat (using an integer
2710 // multiply) before we splat as a vector.
2711 return MVT::v32i8;
2712 }
2713 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2714 return MVT::v16i8;
2715 // TODO: Can SSE1 handle a byte vector?
2716 // If we have SSE1 registers we should be able to use them.
2717 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2718 (Subtarget.getPreferVectorWidth() >= 128))
2719 return MVT::v4f32;
2720 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2721 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2722 // Do not use f64 to lower memcpy if source is string constant. It's
2723 // better to use i32 to avoid the loads.
2724 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2725 // The gymnastics of splatting a byte value into an XMM register and then
2726 // only using 8-byte stores (because this is a CPU with slow unaligned
2727 // 16-byte accesses) makes that a loser.
2728 return MVT::f64;
2729 }
2730 }
2731 // This is a compromise. If we reach here, unaligned accesses may be slow on
2732 // this target. However, creating smaller, aligned accesses could be even
2733 // slower and would certainly be a lot more code.
2734 if (Subtarget.is64Bit() && Op.size() >= 8)
2735 return MVT::i64;
2736 return MVT::i32;
2737}
2738
2739bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2740 if (VT == MVT::f32)
2741 return Subtarget.hasSSE1();
2742 if (VT == MVT::f64)
2743 return Subtarget.hasSSE2();
2744 return true;
2745}
2746
2747static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
2748 return (8 * Alignment.value()) % SizeInBits == 0;
2749}
2750
2751bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
2752 if (isBitAligned(Alignment, VT.getSizeInBits()))
2753 return true;
2754 switch (VT.getSizeInBits()) {
2755 default:
2756 // 8-byte and under are always assumed to be fast.
2757 return true;
2758 case 128:
2759 return !Subtarget.isUnalignedMem16Slow();
2760 case 256:
2761 return !Subtarget.isUnalignedMem32Slow();
2762 // TODO: What about AVX-512 (512-bit) accesses?
2763 }
2764}
2765
2766bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2767 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2768 unsigned *Fast) const {
2769 if (Fast)
2770 *Fast = isMemoryAccessFast(VT, Alignment);
2771 // NonTemporal vector memory ops must be aligned.
2772 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2773 // NT loads can only be vector aligned, so if its less aligned than the
2774 // minimum vector size (which we can split the vector down to), we might as
2775 // well use a regular unaligned vector load.
2776 // We don't have any NT loads pre-SSE41.
2777 if (!!(Flags & MachineMemOperand::MOLoad))
2778 return (Alignment < 16 || !Subtarget.hasSSE41());
2779 return false;
2780 }
2781 // Misaligned accesses of any size are always allowed.
2782 return true;
2783}
2784
2785bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
2786 const DataLayout &DL, EVT VT,
2787 unsigned AddrSpace, Align Alignment,
2788 MachineMemOperand::Flags Flags,
2789 unsigned *Fast) const {
2790 if (Fast)
2791 *Fast = isMemoryAccessFast(VT, Alignment);
2792 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2793 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
2794 /*Fast=*/nullptr))
2795 return true;
2796 // NonTemporal vector memory ops are special, and must be aligned.
2797 if (!isBitAligned(Alignment, VT.getSizeInBits()))
2798 return false;
2799 switch (VT.getSizeInBits()) {
2800 case 128:
2801 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
2802 return true;
2803 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
2804 return true;
2805 return false;
2806 case 256:
2807 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
2808 return true;
2809 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
2810 return true;
2811 return false;
2812 case 512:
2813 if (Subtarget.hasAVX512())
2814 return true;
2815 return false;
2816 default:
2817 return false; // Don't have NonTemporal vector memory ops of this size.
2818 }
2819 }
2820 return true;
2821}
2822
2823/// Return the entry encoding for a jump table in the
2824/// current function. The returned value is a member of the
2825/// MachineJumpTableInfo::JTEntryKind enum.
2826unsigned X86TargetLowering::getJumpTableEncoding() const {
2827 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2828 // symbol.
2829 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2830 return MachineJumpTableInfo::EK_Custom32;
2831
2832 // Otherwise, use the normal jump table encoding heuristics.
2833 return TargetLowering::getJumpTableEncoding();
2834}
2835
2836bool X86TargetLowering::splitValueIntoRegisterParts(
2837 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
2838 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
2839 bool IsABIRegCopy = CC.has_value();
2840 EVT ValueVT = Val.getValueType();
2841 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2842 unsigned ValueBits = ValueVT.getSizeInBits();
2843 unsigned PartBits = PartVT.getSizeInBits();
2844 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
2845 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
2846 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
2847 Parts[0] = Val;
2848 return true;
2849 }
2850 return false;
2851}
2852
2853SDValue X86TargetLowering::joinRegisterPartsIntoValue(
2854 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
2855 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
2856 bool IsABIRegCopy = CC.has_value();
2857 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2858 unsigned ValueBits = ValueVT.getSizeInBits();
2859 unsigned PartBits = PartVT.getSizeInBits();
2860 SDValue Val = Parts[0];
2861
2862 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
2863 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
2864 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
2865 return Val;
2866 }
2867 return SDValue();
2868}
2869
2870bool X86TargetLowering::useSoftFloat() const {
2871 return Subtarget.useSoftFloat();
2872}
2873
2874void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2875 ArgListTy &Args) const {
2876
2877 // Only relabel X86-32 for C / Stdcall CCs.
2878 if (Subtarget.is64Bit())
2879 return;
2880 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2881 return;
2882 unsigned ParamRegs = 0;
2883 if (auto *M = MF->getFunction().getParent())
2884 ParamRegs = M->getNumberRegisterParameters();
2885
2886 // Mark the first N int arguments as having reg
2887 for (auto &Arg : Args) {
2888 Type *T = Arg.Ty;
2889 if (T->isIntOrPtrTy())
2890 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2891 unsigned numRegs = 1;
2892 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2893 numRegs = 2;
2894 if (ParamRegs < numRegs)
2895 return;
2896 ParamRegs -= numRegs;
2897 Arg.IsInReg = true;
2898 }
2899 }
2900}
2901
2902const MCExpr *
2903X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2904 const MachineBasicBlock *MBB,
2905 unsigned uid,MCContext &Ctx) const{
2906 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2906, __extension__
__PRETTY_FUNCTION__))
;
2907 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2908 // entries.
2909 return MCSymbolRefExpr::create(MBB->getSymbol(),
2910 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2911}
2912
2913/// Returns relocation base for the given PIC jumptable.
2914SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2915 SelectionDAG &DAG) const {
2916 if (!Subtarget.is64Bit())
2917 // This doesn't have SDLoc associated with it, but is not really the
2918 // same as a Register.
2919 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2920 getPointerTy(DAG.getDataLayout()));
2921 return Table;
2922}
2923
2924/// This returns the relocation base for the given PIC jumptable,
2925/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2926const MCExpr *X86TargetLowering::
2927getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2928 MCContext &Ctx) const {
2929 // X86-64 uses RIP relative addressing based on the jump table label.
2930 if (Subtarget.isPICStyleRIPRel())
2931 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2932
2933 // Otherwise, the reference is relative to the PIC base.
2934 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2935}
2936
2937std::pair<const TargetRegisterClass *, uint8_t>
2938X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2939 MVT VT) const {
2940 const TargetRegisterClass *RRC = nullptr;
2941 uint8_t Cost = 1;
2942 switch (VT.SimpleTy) {
2943 default:
2944 return TargetLowering::findRepresentativeClass(TRI, VT);
2945 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2946 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2947 break;
2948 case MVT::x86mmx:
2949 RRC = &X86::VR64RegClass;
2950 break;
2951 case MVT::f32: case MVT::f64:
2952 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2953 case MVT::v4f32: case MVT::v2f64:
2954 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2955 case MVT::v8f32: case MVT::v4f64:
2956 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2957 case MVT::v16f32: case MVT::v8f64:
2958 RRC = &X86::VR128XRegClass;
2959 break;
2960 }
2961 return std::make_pair(RRC, Cost);
2962}
2963
2964unsigned X86TargetLowering::getAddressSpace() const {
2965 if (Subtarget.is64Bit())
2966 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2967 return 256;
2968}
2969
2970static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2971 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2972 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2973}
2974
2975static Constant* SegmentOffset(IRBuilderBase &IRB,
2976 int Offset, unsigned AddressSpace) {
2977 return ConstantExpr::getIntToPtr(
2978 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2979 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2980}
2981
2982Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
2983 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2984 // tcbhead_t; use it instead of the usual global variable (see
2985 // sysdeps/{i386,x86_64}/nptl/tls.h)
2986 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2987 if (Subtarget.isTargetFuchsia()) {
2988 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2989 return SegmentOffset(IRB, 0x10, getAddressSpace());
2990 } else {
2991 unsigned AddressSpace = getAddressSpace();
2992 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
2993 // Specially, some users may customize the base reg and offset.
2994 int Offset = M->getStackProtectorGuardOffset();
2995 // If we don't set -stack-protector-guard-offset value:
2996 // %fs:0x28, unless we're using a Kernel code model, in which case
2997 // it's %gs:0x28. gs:0x14 on i386.
2998 if (Offset == INT_MAX2147483647)
2999 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
3000
3001 StringRef GuardReg = M->getStackProtectorGuardReg();
3002 if (GuardReg == "fs")
3003 AddressSpace = X86AS::FS;
3004 else if (GuardReg == "gs")
3005 AddressSpace = X86AS::GS;
3006
3007 // Use symbol guard if user specify.
3008 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
3009 if (!GuardSymb.empty()) {
3010 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
3011 if (!GV) {
3012 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
3013 : Type::getInt32Ty(M->getContext());
3014 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
3015 nullptr, GuardSymb, nullptr,
3016 GlobalValue::NotThreadLocal, AddressSpace);
3017 }
3018 return GV;
3019 }
3020
3021 return SegmentOffset(IRB, Offset, AddressSpace);
3022 }
3023 }
3024 return TargetLowering::getIRStackGuard(IRB);
3025}
3026
3027void X86TargetLowering::insertSSPDeclarations(Module &M) const {
3028 // MSVC CRT provides functionalities for stack protection.
3029 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3030 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3031 // MSVC CRT has a global variable holding security cookie.
3032 M.getOrInsertGlobal("__security_cookie",
3033 Type::getInt8PtrTy(M.getContext()));
3034
3035 // MSVC CRT has a function to validate security cookie.
3036 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
3037 "__security_check_cookie", Type::getVoidTy(M.getContext()),
3038 Type::getInt8PtrTy(M.getContext()));
3039 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
3040 F->setCallingConv(CallingConv::X86_FastCall);
3041 F->addParamAttr(0, Attribute::AttrKind::InReg);
3042 }
3043 return;
3044 }
3045
3046 StringRef GuardMode = M.getStackProtectorGuard();
3047
3048 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
3049 if ((GuardMode == "tls" || GuardMode.empty()) &&
3050 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
3051 return;
3052 TargetLowering::insertSSPDeclarations(M);
3053}
3054
3055Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
3056 // MSVC CRT has a global variable holding security cookie.
3057 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3058 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3059 return M.getGlobalVariable("__security_cookie");
3060 }
3061 return TargetLowering::getSDagStackGuard(M);
3062}
3063
3064Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
3065 // MSVC CRT has a function to validate security cookie.
3066 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
3067 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
3068 return M.getFunction("__security_check_cookie");
3069 }
3070 return TargetLowering::getSSPStackGuardCheck(M);
3071}
3072
3073Value *
3074X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
3075 if (Subtarget.getTargetTriple().isOSContiki())
3076 return getDefaultSafeStackPointerLocation(IRB, false);
3077
3078 // Android provides a fixed TLS slot for the SafeStack pointer. See the
3079 // definition of TLS_SLOT_SAFESTACK in
3080 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
3081 if (Subtarget.isTargetAndroid()) {
3082 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
3083 // %gs:0x24 on i386
3084 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
3085 return SegmentOffset(IRB, Offset, getAddressSpace());
3086 }
3087
3088 // Fuchsia is similar.
3089 if (Subtarget.isTargetFuchsia()) {
3090 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
3091 return SegmentOffset(IRB, 0x18, getAddressSpace());
3092 }
3093
3094 return TargetLowering::getSafeStackPointerLocation(IRB);
3095}
3096
3097//===----------------------------------------------------------------------===//
3098// Return Value Calling Convention Implementation
3099//===----------------------------------------------------------------------===//
3100
3101bool X86TargetLowering::CanLowerReturn(
3102 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3103 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3104 SmallVector<CCValAssign, 16> RVLocs;
3105 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3106 return CCInfo.CheckReturn(Outs, RetCC_X86);
3107}
3108
3109const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
3110 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
3111 return ScratchRegs;
3112}
3113
3114/// Lowers masks values (v*i1) to the local register values
3115/// \returns DAG node after lowering to register type
3116static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
3117 const SDLoc &Dl, SelectionDAG &DAG) {
3118 EVT ValVT = ValArg.getValueType();
3119
3120 if (ValVT == MVT::v1i1)
3121 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
3122 DAG.getIntPtrConstant(0, Dl));
3123
3124 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
3125 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
3126 // Two stage lowering might be required
3127 // bitcast: v8i1 -> i8 / v16i1 -> i16
3128 // anyextend: i8 -> i32 / i16 -> i32
3129 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
3130 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
3131 if (ValLoc == MVT::i32)
3132 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
3133 return ValToCopy;
3134 }
3135
3136 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
3137 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
3138 // One stage lowering is required
3139 // bitcast: v32i1 -> i32 / v64i1 -> i64
3140 return DAG.getBitcast(ValLoc, ValArg);
3141 }
3142
3143 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
3144}
3145
3146/// Breaks v64i1 value into two registers and adds the new node to the DAG
3147static void Passv64i1ArgInRegs(
3148 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
3149 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
3150 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
3151 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3151, __extension__
__PRETTY_FUNCTION__))
;
3152 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3152, __extension__
__PRETTY_FUNCTION__))
;
3153 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3153, __extension__
__PRETTY_FUNCTION__))
;
3154 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3155, __extension__
__PRETTY_FUNCTION__))
3155 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3155, __extension__
__PRETTY_FUNCTION__))
;
3156
3157 // Before splitting the value we cast it to i64
3158 Arg = DAG.getBitcast(MVT::i64, Arg);
3159
3160 // Splitting the value into two i32 types
3161 SDValue Lo, Hi;
3162 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
3163 DAG.getConstant(0, Dl, MVT::i32));
3164 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
3165 DAG.getConstant(1, Dl, MVT::i32));
3166
3167 // Attach the two i32 types into corresponding registers
3168 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
3169 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
3170}
3171
3172SDValue
3173X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3174 bool isVarArg,
3175 const SmallVectorImpl<ISD::OutputArg> &Outs,
3176 const SmallVectorImpl<SDValue> &OutVals,
3177 const SDLoc &dl, SelectionDAG &DAG) const {
3178 MachineFunction &MF = DAG.getMachineFunction();
3179 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3180
3181 // In some cases we need to disable registers from the default CSR list.
3182 // For example, when they are used for argument passing.
3183 bool ShouldDisableCalleeSavedRegister =
3184 shouldDisableCalleeSavedRegisterCC(CallConv) ||
3185 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
3186
3187 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
3188 report_fatal_error("X86 interrupts may not return any value");
3189
3190 SmallVector<CCValAssign, 16> RVLocs;
3191 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
3192 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
3193
3194 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
3195 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
3196 ++I, ++OutsIndex) {
3197 CCValAssign &VA = RVLocs[I];
3198 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3198, __extension__
__PRETTY_FUNCTION__))
;
3199
3200 // Add the register to the CalleeSaveDisableRegs list.
3201 if (ShouldDisableCalleeSavedRegister)
3202 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
3203
3204 SDValue ValToCopy = OutVals[OutsIndex];
3205 EVT ValVT = ValToCopy.getValueType();
3206
3207 // Promote values to the appropriate types.
3208 if (VA.getLocInfo() == CCValAssign::SExt)
3209 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
3210 else if (VA.getLocInfo() == CCValAssign::ZExt)
3211 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
3212 else if (VA.getLocInfo() == CCValAssign::AExt) {
3213 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
3214 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
3215 else
3216 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
3217 }
3218 else if (VA.getLocInfo() == CCValAssign::BCvt)
3219 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
3220
3221 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3222, __extension__
__PRETTY_FUNCTION__))
3222 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3222, __extension__
__PRETTY_FUNCTION__))
;
3223
3224 // Report an error if we have attempted to return a value via an XMM
3225 // register and SSE was disabled.
3226 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3227 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3228 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3229 } else if (!Subtarget.hasSSE2() &&
3230 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3231 ValVT == MVT::f64) {
3232 // When returning a double via an XMM register, report an error if SSE2 is
3233 // not enabled.
3234 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3235 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3236 }
3237
3238 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
3239 // the RET instruction and handled by the FP Stackifier.
3240 if (VA.getLocReg() == X86::FP0 ||
3241 VA.getLocReg() == X86::FP1) {
3242 // If this is a copy from an xmm register to ST(0), use an FPExtend to
3243 // change the value to the FP stack register class.
3244 if (isScalarFPTypeInSSEReg(VA.getValVT()))
3245 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
3246 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3247 // Don't emit a copytoreg.
3248 continue;
3249 }
3250
3251 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
3252 // which is returned in RAX / RDX.
3253 if (Subtarget.is64Bit()) {
3254 if (ValVT == MVT::x86mmx) {
3255 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
3256 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
3257 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
3258 ValToCopy);
3259 // If we don't have SSE2 available, convert to v4f32 so the generated
3260 // register is legal.
3261 if (!Subtarget.hasSSE2())
3262 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
3263 }
3264 }
3265 }
3266
3267 if (VA.needsCustom()) {
3268 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3269, __extension__
__PRETTY_FUNCTION__))
3269 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3269, __extension__
__PRETTY_FUNCTION__))
;
3270
3271 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
3272 Subtarget);
3273
3274 // Add the second register to the CalleeSaveDisableRegs list.
3275 if (ShouldDisableCalleeSavedRegister)
3276 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
3277 } else {
3278 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3279 }
3280 }
3281
3282 SDValue Flag;
3283 SmallVector<SDValue, 6> RetOps;
3284 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3285 // Operand #1 = Bytes To Pop
3286 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
3287 MVT::i32));
3288
3289 // Copy the result values into the output registers.
3290 for (auto &RetVal : RetVals) {
3291 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
3292 RetOps.push_back(RetVal.second);
3293 continue; // Don't emit a copytoreg.
3294 }
3295
3296 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
3297 Flag = Chain.getValue(1);
3298 RetOps.push_back(
3299 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
3300 }
3301
3302 // Swift calling convention does not require we copy the sret argument
3303 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
3304
3305 // All x86 ABIs require that for returning structs by value we copy
3306 // the sret argument into %rax/%eax (depending on ABI) for the return.
3307 // We saved the argument into a virtual register in the entry block,
3308 // so now we copy the value out and into %rax/%eax.
3309 //
3310 // Checking Function.hasStructRetAttr() here is insufficient because the IR
3311 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
3312 // false, then an sret argument may be implicitly inserted in the SelDAG. In
3313 // either case FuncInfo->setSRetReturnReg() will have been called.
3314 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3315 // When we have both sret and another return value, we should use the
3316 // original Chain stored in RetOps[0], instead of the current Chain updated
3317 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
3318
3319 // For the case of sret and another return value, we have
3320 // Chain_0 at the function entry
3321 // Chain_1 = getCopyToReg(Chain_0) in the above loop
3322 // If we use Chain_1 in getCopyFromReg, we will have
3323 // Val = getCopyFromReg(Chain_1)
3324 // Chain_2 = getCopyToReg(Chain_1, Val) from below
3325
3326 // getCopyToReg(Chain_0) will be glued together with
3327 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
3328 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
3329 // Data dependency from Unit B to Unit A due to usage of Val in
3330 // getCopyToReg(Chain_1, Val)
3331 // Chain dependency from Unit A to Unit B
3332
3333 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
3334 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
3335 getPointerTy(MF.getDataLayout()));
3336
3337 Register RetValReg
3338 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
3339 X86::RAX : X86::EAX;
3340 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
3341 Flag = Chain.getValue(1);
3342
3343 // RAX/EAX now acts like a return value.
3344 RetOps.push_back(
3345 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
3346
3347 // Add the returned register to the CalleeSaveDisableRegs list.
3348 if (ShouldDisableCalleeSavedRegister)
3349 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
3350 }
3351
3352 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3353 const MCPhysReg *I =
3354 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3355 if (I) {
3356 for (; *I; ++I) {
3357 if (X86::GR64RegClass.contains(*I))
3358 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3359 else
3360 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3360)
;
3361 }
3362 }
3363
3364 RetOps[0] = Chain; // Update chain.
3365
3366 // Add the flag if we have it.
3367 if (Flag.getNode())
3368 RetOps.push_back(Flag);
3369
3370 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
3371 if (CallConv == CallingConv::X86_INTR)
3372 opcode = X86ISD::IRET;
3373 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
3374}
3375
3376bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3377 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
3378 return false;
3379
3380 SDValue TCChain = Chain;
3381 SDNode *Copy = *N->use_begin();
3382 if (Copy->getOpcode() == ISD::CopyToReg) {
3383 // If the copy has a glue operand, we conservatively assume it isn't safe to
3384 // perform a tail call.
3385 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3386 return false;
3387 TCChain = Copy->getOperand(0);
3388 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
3389 return false;
3390
3391 bool HasRet = false;
3392 for (const SDNode *U : Copy->uses()) {
3393 if (U->getOpcode() != X86ISD::RET_FLAG)
3394 return false;
3395 // If we are returning more than one value, we can definitely
3396 // not make a tail call see PR19530
3397 if (U->getNumOperands() > 4)
3398 return false;
3399 if (U->getNumOperands() == 4 &&
3400 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
3401 return false;
3402 HasRet = true;
3403 }
3404
3405 if (!HasRet)
3406 return false;
3407
3408 Chain = TCChain;
3409 return true;
3410}
3411
3412EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
3413 ISD::NodeType ExtendKind) const {
3414 MVT ReturnMVT = MVT::i32;
3415
3416 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
3417 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
3418 // The ABI does not require i1, i8 or i16 to be extended.
3419 //
3420 // On Darwin, there is code in the wild relying on Clang's old behaviour of
3421 // always extending i8/i16 return values, so keep doing that for now.
3422 // (PR26665).
3423 ReturnMVT = MVT::i8;
3424 }
3425
3426 EVT MinVT = getRegisterType(Context, ReturnMVT);
3427 return VT.bitsLT(MinVT) ? MinVT : VT;
3428}
3429
3430/// Reads two 32 bit registers and creates a 64 bit mask value.
3431/// \param VA The current 32 bit value that need to be assigned.
3432/// \param NextVA The next 32 bit value that need to be assigned.
3433/// \param Root The parent DAG node.
3434/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
3435/// glue purposes. In the case the DAG is already using
3436/// physical register instead of virtual, we should glue
3437/// our new SDValue to InFlag SDvalue.
3438/// \return a new SDvalue of size 64bit.
3439static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
3440 SDValue &Root, SelectionDAG &DAG,
3441 const SDLoc &Dl, const X86Subtarget &Subtarget,
3442 SDValue *InFlag = nullptr) {
3443 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3443, __extension__
__PRETTY_FUNCTION__))
;
3444 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3444, __extension__
__PRETTY_FUNCTION__))
;
3445 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3446, __extension__
__PRETTY_FUNCTION__))
3446 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3446, __extension__
__PRETTY_FUNCTION__))
;
3447 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3448, __extension__
__PRETTY_FUNCTION__))
3448 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3448, __extension__
__PRETTY_FUNCTION__))
;
3449 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3450, __extension__
__PRETTY_FUNCTION__))
3450 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3450, __extension__
__PRETTY_FUNCTION__))
;
3451
3452 SDValue Lo, Hi;
3453 SDValue ArgValueLo, ArgValueHi;
3454
3455 MachineFunction &MF = DAG.getMachineFunction();
3456 const TargetRegisterClass *RC = &X86::GR32RegClass;
3457
3458 // Read a 32 bit value from the registers.
3459 if (nullptr == InFlag) {
3460 // When no physical register is present,
3461 // create an intermediate virtual register.
3462 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3463 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3464 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3465 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3466 } else {
3467 // When a physical register is available read the value from it and glue
3468 // the reads together.
3469 ArgValueLo =
3470 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
3471 *InFlag = ArgValueLo.getValue(2);
3472 ArgValueHi =
3473 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
3474 *InFlag = ArgValueHi.getValue(2);
3475 }
3476
3477 // Convert the i32 type into v32i1 type.
3478 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
3479
3480 // Convert the i32 type into v32i1 type.
3481 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
3482
3483 // Concatenate the two values together.
3484 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
3485}
3486
3487/// The function will lower a register of various sizes (8/16/32/64)
3488/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
3489/// \returns a DAG node contains the operand after lowering to mask type.
3490static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3491 const EVT &ValLoc, const SDLoc &Dl,
3492 SelectionDAG &DAG) {
3493 SDValue ValReturned = ValArg;
3494
3495 if (ValVT == MVT::v1i1)
3496 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3497
3498 if (ValVT == MVT::v64i1) {
3499 // In 32 bit machine, this case is handled by getv64i1Argument
3500 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3500, __extension__
__PRETTY_FUNCTION__))
;
3501 // In 64 bit machine, There is no need to truncate the value only bitcast
3502 } else {
3503 MVT maskLen;
3504 switch (ValVT.getSimpleVT().SimpleTy) {
3505 case MVT::v8i1:
3506 maskLen = MVT::i8;
3507 break;
3508 case MVT::v16i1:
3509 maskLen = MVT::i16;
3510 break;
3511 case MVT::v32i1:
3512 maskLen = MVT::i32;
3513 break;
3514 default:
3515 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3515)
;
3516 }
3517
3518 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3519 }
3520 return DAG.getBitcast(ValVT, ValReturned);
3521}
3522
3523/// Lower the result values of a call into the
3524/// appropriate copies out of appropriate physical registers.
3525///
3526SDValue X86TargetLowering::LowerCallResult(
3527 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3528 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3529 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3530 uint32_t *RegMask) const {
3531
3532 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3533 // Assign locations to each value returned by this call.
3534 SmallVector<CCValAssign, 16> RVLocs;
3535 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3536 *DAG.getContext());
3537 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3538
3539 // Copy all of the result registers out of their specified physreg.
3540 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3541 ++I, ++InsIndex) {
3542 CCValAssign &VA = RVLocs[I];
3543 EVT CopyVT = VA.getLocVT();
3544
3545 // In some calling conventions we need to remove the used registers
3546 // from the register mask.
3547 if (RegMask) {
3548 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3549 SubRegs.isValid(); ++SubRegs)
3550 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3551 }
3552
3553 // Report an error if there was an attempt to return FP values via XMM
3554 // registers.
3555 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3556 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3557 if (VA.getLocReg() == X86::XMM1)
3558 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3559 else
3560 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3561 } else if (!Subtarget.hasSSE2() &&
3562 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3563 CopyVT == MVT::f64) {
3564 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3565 if (VA.getLocReg() == X86::XMM1)
3566 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3567 else
3568 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3569 }
3570
3571 // If we prefer to use the value in xmm registers, copy it out as f80 and
3572 // use a truncate to move it from fp stack reg to xmm reg.
3573 bool RoundAfterCopy = false;
3574 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3575 isScalarFPTypeInSSEReg(VA.getValVT())) {
3576 if (!Subtarget.hasX87())
3577 report_fatal_error("X87 register return with X87 disabled");
3578 CopyVT = MVT::f80;
3579 RoundAfterCopy = (CopyVT != VA.getLocVT());
3580 }
3581
3582 SDValue Val;
3583 if (VA.needsCustom()) {
3584 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3585, __extension__
__PRETTY_FUNCTION__))
3585 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3585, __extension__
__PRETTY_FUNCTION__))
;
3586 Val =
3587 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3588 } else {
3589 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3590 .getValue(1);
3591 Val = Chain.getValue(0);
3592 InFlag = Chain.getValue(2);
3593 }
3594
3595 if (RoundAfterCopy)
3596 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3597 // This truncation won't change the value.
3598 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
3599
3600 if (VA.isExtInLoc()) {
3601 if (VA.getValVT().isVector() &&
3602 VA.getValVT().getScalarType() == MVT::i1 &&
3603 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3604 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3605 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3606 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3607 } else
3608 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3609 }
3610
3611 if (VA.getLocInfo() == CCValAssign::BCvt)
3612 Val = DAG.getBitcast(VA.getValVT(), Val);
3613
3614 InVals.push_back(Val);
3615 }
3616
3617 return Chain;
3618}
3619
3620//===----------------------------------------------------------------------===//
3621// C & StdCall & Fast Calling Convention implementation
3622//===----------------------------------------------------------------------===//
3623// StdCall calling convention seems to be standard for many Windows' API
3624// routines and around. It differs from C calling convention just a little:
3625// callee should clean up the stack, not caller. Symbols should be also
3626// decorated in some fancy way :) It doesn't support any vector arguments.
3627// For info on fast calling convention see Fast Calling Convention (tail call)
3628// implementation LowerX86_32FastCCCallTo.
3629
3630/// Determines whether Args, either a set of outgoing arguments to a call, or a
3631/// set of incoming args of a call, contains an sret pointer that the callee
3632/// pops
3633template <typename T>
3634static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
3635 const X86Subtarget &Subtarget) {
3636 // Not C++20 (yet), so no concepts available.
3637 static_assert(std::is_same_v<T, ISD::OutputArg> ||
3638 std::is_same_v<T, ISD::InputArg>,
3639 "requires ISD::OutputArg or ISD::InputArg");
3640
3641 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
3642 // for most compilations.
3643 if (!Subtarget.is32Bit())
3644 return false;
3645
3646 if (Args.empty())
3647 return false;
3648
3649 // Most calls do not have an sret argument, check the arg next.
3650 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
3651 if (!Flags.isSRet() || Flags.isInReg())
3652 return false;
3653
3654 // The MSVCabi does not pop the sret.
3655 if (Subtarget.getTargetTriple().isOSMSVCRT())
3656 return false;
3657
3658 // MCUs don't pop the sret
3659 if (Subtarget.isTargetMCU())
3660 return false;
3661
3662 // Callee pops argument
3663 return true;
3664}
3665
3666/// Make a copy of an aggregate at address specified by "Src" to address
3667/// "Dst" with size and alignment information specified by the specific
3668/// parameter attribute. The copy will be passed as a byval function parameter.
3669static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3670 SDValue Chain, ISD::ArgFlagsTy Flags,
3671 SelectionDAG &DAG, const SDLoc &dl) {
3672 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3673
3674 return DAG.getMemcpy(
3675 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3676 /*isVolatile*/ false, /*AlwaysInline=*/true,
3677 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3678}
3679
3680/// Return true if the calling convention is one that we can guarantee TCO for.
3681static bool canGuaranteeTCO(CallingConv::ID CC) {
3682 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3683 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3684 CC == CallingConv::HHVM || CC == CallingConv::Tail ||
3685 CC == CallingConv::SwiftTail);
3686}
3687
3688/// Return true if we might ever do TCO for calls with this calling convention.
3689static bool mayTailCallThisCC(CallingConv::ID CC) {
3690 switch (CC) {
3691 // C calling conventions:
3692 case CallingConv::C:
3693 case CallingConv::Win64:
3694 case CallingConv::X86_64_SysV:
3695 // Callee pop conventions:
3696 case CallingConv::X86_ThisCall:
3697 case CallingConv::X86_StdCall:
3698 case CallingConv::X86_VectorCall:
3699 case CallingConv::X86_FastCall:
3700 // Swift:
3701 case CallingConv::Swift:
3702 return true;
3703 default:
3704 return canGuaranteeTCO(CC);
3705 }
3706}
3707
3708/// Return true if the function is being made into a tailcall target by
3709/// changing its ABI.
3710static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3711 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3712 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3713}
3714
3715bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3716 if (!CI->isTailCall())
3717 return false;
3718
3719 CallingConv::ID CalleeCC = CI->getCallingConv();
3720 if (!mayTailCallThisCC(CalleeCC))
3721 return false;
3722
3723 return true;
3724}
3725
3726SDValue
3727X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3728 const SmallVectorImpl<ISD::InputArg> &Ins,
3729 const SDLoc &dl, SelectionDAG &DAG,
3730 const CCValAssign &VA,
3731 MachineFrameInfo &MFI, unsigned i) const {
3732 // Create the nodes corresponding to a load from this parameter slot.
3733 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3734 bool AlwaysUseMutable = shouldGuaranteeTCO(
3735 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3736 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3737 EVT ValVT;
3738 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3739
3740 // If value is passed by pointer we have address passed instead of the value
3741 // itself. No need to extend if the mask value and location share the same
3742 // absolute size.
3743 bool ExtendedInMem =
3744 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3745 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3746
3747 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3748 ValVT = VA.getLocVT();
3749 else
3750 ValVT = VA.getValVT();
3751
3752 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3753 // changed with more analysis.
3754 // In case of tail call optimization mark all arguments mutable. Since they
3755 // could be overwritten by lowering of arguments in case of a tail call.
3756 if (Flags.isByVal()) {
3757 unsigned Bytes = Flags.getByValSize();
3758 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3759
3760 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3761 // can be improved with deeper analysis.
3762 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3763 /*isAliased=*/true);
3764 return DAG.getFrameIndex(FI, PtrVT);
3765 }
3766
3767 EVT ArgVT = Ins[i].ArgVT;
3768
3769 // If this is a vector that has been split into multiple parts, and the
3770 // scalar size of the parts don't match the vector element size, then we can't
3771 // elide the copy. The parts will have padding between them instead of being
3772 // packed like a vector.
3773 bool ScalarizedAndExtendedVector =
3774 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3775 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3776
3777 // This is an argument in memory. We might be able to perform copy elision.
3778 // If the argument is passed directly in memory without any extension, then we
3779 // can perform copy elision. Large vector types, for example, may be passed
3780 // indirectly by pointer.
3781 if (Flags.isCopyElisionCandidate() &&
3782 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3783 !ScalarizedAndExtendedVector) {
3784 SDValue PartAddr;
3785 if (Ins[i].PartOffset == 0) {
3786 // If this is a one-part value or the first part of a multi-part value,
3787 // create a stack object for the entire argument value type and return a
3788 // load from our portion of it. This assumes that if the first part of an
3789 // argument is in memory, the rest will also be in memory.
3790 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3791 /*IsImmutable=*/false);
3792 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3793 return DAG.getLoad(
3794 ValVT, dl, Chain, PartAddr,
3795 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3796 } else {
3797 // This is not the first piece of an argument in memory. See if there is
3798 // already a fixed stack object including this offset. If so, assume it
3799 // was created by the PartOffset == 0 branch above and create a load from
3800 // the appropriate offset into it.
3801 int64_t PartBegin = VA.getLocMemOffset();
3802 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3803 int FI = MFI.getObjectIndexBegin();
3804 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3805 int64_t ObjBegin = MFI.getObjectOffset(FI);
3806 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3807 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3808 break;
3809 }
3810 if (MFI.isFixedObjectIndex(FI)) {
3811 SDValue Addr =
3812 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3813 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3814 return DAG.getLoad(
3815 ValVT, dl, Chain, Addr,
3816 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3817 Ins[i].PartOffset));
3818 }
3819 }
3820 }
3821
3822 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3823 VA.getLocMemOffset(), isImmutable);
3824
3825 // Set SExt or ZExt flag.
3826 if (VA.getLocInfo() == CCValAssign::ZExt) {
3827 MFI.setObjectZExt(FI, true);
3828 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3829 MFI.setObjectSExt(FI, true);
3830 }
3831
3832 MaybeAlign Alignment;
3833 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
3834 ValVT != MVT::f80)
3835 Alignment = MaybeAlign(4);
3836 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3837 SDValue Val = DAG.getLoad(
3838 ValVT, dl, Chain, FIN,
3839 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3840 Alignment);
3841 return ExtendedInMem
3842 ? (VA.getValVT().isVector()
3843 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3844 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3845 : Val;
3846}
3847
3848// FIXME: Get this from tablegen.
3849static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3850 const X86Subtarget &Subtarget) {
3851 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3851, __extension__ __PRETTY_FUNCTION__))
;
3852
3853 if (Subtarget.isCallingConvWin64(CallConv)) {
3854 static const MCPhysReg GPR64ArgRegsWin64[] = {
3855 X86::RCX, X86::RDX, X86::R8, X86::R9
3856 };
3857 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3858 }
3859
3860 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3861 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3862 };
3863 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3864}
3865
3866// FIXME: Get this from tablegen.
3867static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3868 CallingConv::ID CallConv,
3869 const X86Subtarget &Subtarget) {
3870 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3870, __extension__ __PRETTY_FUNCTION__))
;
3871 if (Subtarget.isCallingConvWin64(CallConv)) {
3872 // The XMM registers which might contain var arg parameters are shadowed
3873 // in their paired GPR. So we only need to save the GPR to their home
3874 // slots.
3875 // TODO: __vectorcall will change this.
3876 return std::nullopt;
3877 }
3878
3879 bool isSoftFloat = Subtarget.useSoftFloat();
3880 if (isSoftFloat || !Subtarget.hasSSE1())
3881 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3882 // registers.
3883 return std::nullopt;
3884
3885 static const MCPhysReg XMMArgRegs64Bit[] = {
3886 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3887 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3888 };
3889 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3890}
3891
3892#ifndef NDEBUG
3893static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3894 return llvm::is_sorted(
3895 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3896 return A.getValNo() < B.getValNo();
3897 });
3898}
3899#endif
3900
3901namespace {
3902/// This is a helper class for lowering variable arguments parameters.
3903class VarArgsLoweringHelper {
3904public:
3905 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3906 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3907 CallingConv::ID CallConv, CCState &CCInfo)
3908 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3909 TheMachineFunction(DAG.getMachineFunction()),
3910 TheFunction(TheMachineFunction.getFunction()),
3911 FrameInfo(TheMachineFunction.getFrameInfo()),
3912 FrameLowering(*Subtarget.getFrameLowering()),
3913 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3914 CCInfo(CCInfo) {}
3915
3916 // Lower variable arguments parameters.
3917 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3918
3919private:
3920 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3921
3922 void forwardMustTailParameters(SDValue &Chain);
3923
3924 bool is64Bit() const { return Subtarget.is64Bit(); }
3925 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3926
3927 X86MachineFunctionInfo *FuncInfo;
3928 const SDLoc &DL;
3929 SelectionDAG &DAG;
3930 const X86Subtarget &Subtarget;
3931 MachineFunction &TheMachineFunction;
3932 const Function &TheFunction;
3933 MachineFrameInfo &FrameInfo;
3934 const TargetFrameLowering &FrameLowering;
3935 const TargetLowering &TargLowering;
3936 CallingConv::ID CallConv;
3937 CCState &CCInfo;
3938};
3939} // namespace
3940
3941void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3942 SDValue &Chain, unsigned StackSize) {
3943 // If the function takes variable number of arguments, make a frame index for
3944 // the start of the first vararg value... for expansion of llvm.va_start. We
3945 // can skip this if there are no va_start calls.
3946 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3947 CallConv != CallingConv::X86_ThisCall)) {
3948 FuncInfo->setVarArgsFrameIndex(
3949 FrameInfo.CreateFixedObject(1, StackSize, true));
3950 }
3951
3952 // 64-bit calling conventions support varargs and register parameters, so we
3953 // have to do extra work to spill them in the prologue.
3954 if (is64Bit()) {
3955 // Find the first unallocated argument registers.
3956 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3957 ArrayRef<MCPhysReg> ArgXMMs =
3958 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3959 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3960 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3961
3962 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3963, __extension__
__PRETTY_FUNCTION__))
3963 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3963, __extension__
__PRETTY_FUNCTION__))
;
3964
3965 if (isWin64()) {
3966 // Get to the caller-allocated home save location. Add 8 to account
3967 // for the return address.
3968 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3969 FuncInfo->setRegSaveFrameIndex(
3970 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3971 // Fixup to set vararg frame on shadow area (4 x i64).
3972 if (NumIntRegs < 4)
3973 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3974 } else {
3975 // For X86-64, if there are vararg parameters that are passed via
3976 // registers, then we must store them to their spots on the stack so
3977 // they may be loaded by dereferencing the result of va_next.
3978 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3979 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3980 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3981 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3982 }
3983
3984 SmallVector<SDValue, 6>
3985 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3986 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3987 // keeping live input value
3988 SDValue ALVal; // if applicable keeps SDValue for %al register
3989
3990 // Gather all the live in physical registers.
3991 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3992 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3993 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3994 }
3995 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3996 if (!AvailableXmms.empty()) {
3997 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3998 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3999 for (MCPhysReg Reg : AvailableXmms) {
4000 // FastRegisterAllocator spills virtual registers at basic
4001 // block boundary. That leads to usages of xmm registers
4002 // outside of check for %al. Pass physical registers to
4003 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
4004 TheMachineFunction.getRegInfo().addLiveIn(Reg);
4005 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
4006 }
4007 }
4008
4009 // Store the integer parameter registers.
4010 SmallVector<SDValue, 8> MemOps;
4011 SDValue RSFIN =
4012 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
4013 TargLowering.getPointerTy(DAG.getDataLayout()));
4014 unsigned Offset = FuncInfo->getVarArgsGPOffset();
4015 for (SDValue Val : LiveGPRs) {
4016 SDValue FIN = DAG.getNode(ISD::ADD, DL,
4017 TargLowering.getPointerTy(DAG.getDataLayout()),
4018 RSFIN, DAG.getIntPtrConstant(Offset, DL));
4019 SDValue Store =
4020 DAG.getStore(Val.getValue(1), DL, Val, FIN,
4021 MachinePointerInfo::getFixedStack(
4022 DAG.getMachineFunction(),
4023 FuncInfo->getRegSaveFrameIndex(), Offset));
4024 MemOps.push_back(Store);
4025 Offset += 8;
4026 }
4027
4028 // Now store the XMM (fp + vector) parameter registers.
4029 if (!LiveXMMRegs.empty()) {
4030 SmallVector<SDValue, 12> SaveXMMOps;
4031 SaveXMMOps.push_back(Chain);
4032 SaveXMMOps.push_back(ALVal);
4033 SaveXMMOps.push_back(RSFIN);
4034 SaveXMMOps.push_back(
4035 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
4036 llvm::append_range(SaveXMMOps, LiveXMMRegs);
4037 MachineMemOperand *StoreMMO =
4038 DAG.getMachineFunction().getMachineMemOperand(
4039 MachinePointerInfo::getFixedStack(
4040 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
4041 Offset),
4042 MachineMemOperand::MOStore, 128, Align(16));
4043 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
4044 DL, DAG.getVTList(MVT::Other),
4045 SaveXMMOps, MVT::i8, StoreMMO));
4046 }
4047
4048 if (!MemOps.empty())
4049 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4050 }
4051}
4052
4053void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
4054 // Find the largest legal vector type.
4055 MVT VecVT = MVT::Other;
4056 // FIXME: Only some x86_32 calling conventions support AVX512.
4057 if (Subtarget.useAVX512Regs() &&
4058 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
4059 CallConv == CallingConv::Intel_OCL_BI)))
4060 VecVT = MVT::v16f32;
4061 else if (Subtarget.hasAVX())
4062 VecVT = MVT::v8f32;
4063 else if (Subtarget.hasSSE2())
4064 VecVT = MVT::v4f32;
4065
4066 // We forward some GPRs and some vector types.
4067 SmallVector<MVT, 2> RegParmTypes;
4068 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
4069 RegParmTypes.push_back(IntVT);
4070 if (VecVT != MVT::Other)
4071 RegParmTypes.push_back(VecVT);
4072
4073 // Compute the set of forwarded registers. The rest are scratch.
4074 SmallVectorImpl<ForwardedRegister> &Forwards =
4075 FuncInfo->getForwardedMustTailRegParms();
4076 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
4077
4078 // Forward AL for SysV x86_64 targets, since it is used for varargs.
4079 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
4080 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
4081 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
4082 }
4083
4084 // Copy all forwards from physical to virtual registers.
4085 for (ForwardedRegister &FR : Forwards) {
4086 // FIXME: Can we use a less constrained schedule?
4087 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
4088 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
4089 TargLowering.getRegClassFor(FR.VT));
4090 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
4091 }
4092}
4093
4094void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
4095 unsigned StackSize) {
4096 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
4097 // If necessary, it would be set into the correct value later.
4098 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
4099 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4100
4101 if (FrameInfo.hasVAStart())
4102 createVarArgAreaAndStoreRegisters(Chain, StackSize);
4103
4104 if (FrameInfo.hasMustTailInVarArgFunc())
4105 forwardMustTailParameters(Chain);
4106}
4107
4108SDValue X86TargetLowering::LowerFormalArguments(
4109 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4110 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4111 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4112 MachineFunction &MF = DAG.getMachineFunction();
4113 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4114
4115 const Function &F = MF.getFunction();
4116 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
4117 F.getName() == "main")
4118 FuncInfo->setForceFramePointer(true);
4119
4120 MachineFrameInfo &MFI = MF.getFrameInfo();
4121 bool Is64Bit = Subtarget.is64Bit();
4122 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4123
4124 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4126, __extension__
__PRETTY_FUNCTION__))
4125 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4126, __extension__
__PRETTY_FUNCTION__))
4126 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4126, __extension__
__PRETTY_FUNCTION__))
;
4127
4128 // Assign locations to all of the incoming arguments.
4129 SmallVector<CCValAssign, 16> ArgLocs;
4130 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4131
4132 // Allocate shadow area for Win64.
4133 if (IsWin64)
4134 CCInfo.AllocateStack(32, Align(8));
4135
4136 CCInfo.AnalyzeArguments(Ins, CC_X86);
4137
4138 // In vectorcall calling convention a second pass is required for the HVA
4139 // types.
4140 if (CallingConv::X86_VectorCall == CallConv) {
4141 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
4142 }
4143
4144 // The next loop assumes that the locations are in the same order of the
4145 // input arguments.
4146 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4147, __extension__
__PRETTY_FUNCTION__))
4147 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4147, __extension__
__PRETTY_FUNCTION__))
;
4148
4149 SDValue ArgValue;
4150 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
4151 ++I, ++InsIndex) {
4152 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4152, __extension__
__PRETTY_FUNCTION__))
;
4153 CCValAssign &VA = ArgLocs[I];
4154
4155 if (VA.isRegLoc()) {
4156 EVT RegVT = VA.getLocVT();
4157 if (VA.needsCustom()) {
4158 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4160, __extension__
__PRETTY_FUNCTION__))
4159 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4160, __extension__
__PRETTY_FUNCTION__))
4160 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4160, __extension__
__PRETTY_FUNCTION__))
;
4161
4162 // v64i1 values, in regcall calling convention, that are
4163 // compiled to 32 bit arch, are split up into two registers.
4164 ArgValue =
4165 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
4166 } else {
4167 const TargetRegisterClass *RC;
4168 if (RegVT == MVT::i8)
4169 RC = &X86::GR8RegClass;
4170 else if (RegVT == MVT::i16)
4171 RC = &X86::GR16RegClass;
4172 else if (RegVT == MVT::i32)
4173 RC = &X86::GR32RegClass;
4174 else if (Is64Bit && RegVT == MVT::i64)
4175 RC = &X86::GR64RegClass;
4176 else if (RegVT == MVT::f16)
4177 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
4178 else if (RegVT == MVT::f32)
4179 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
4180 else if (RegVT == MVT::f64)
4181 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
4182 else if (RegVT == MVT::f80)
4183 RC = &X86::RFP80RegClass;
4184 else if (RegVT == MVT::f128)
4185 RC = &X86::VR128RegClass;
4186 else if (RegVT.is512BitVector())
4187 RC = &X86::VR512RegClass;
4188 else if (RegVT.is256BitVector())
4189 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
4190 else if (RegVT.is128BitVector())
4191 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
4192 else if (RegVT == MVT::x86mmx)
4193 RC = &X86::VR64RegClass;
4194 else if (RegVT == MVT::v1i1)
4195 RC = &X86::VK1RegClass;
4196 else if (RegVT == MVT::v8i1)
4197 RC = &X86::VK8RegClass;
4198 else if (RegVT == MVT::v16i1)
4199 RC = &X86::VK16RegClass;
4200 else if (RegVT == MVT::v32i1)
4201 RC = &X86::VK32RegClass;
4202 else if (RegVT == MVT::v64i1)
4203 RC = &X86::VK64RegClass;
4204 else
4205 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4205)
;
4206
4207 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4208 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4209 }
4210
4211 // If this is an 8 or 16-bit value, it is really passed promoted to 32
4212 // bits. Insert an assert[sz]ext to capture this, then truncate to the
4213 // right size.
4214 if (VA.getLocInfo() == CCValAssign::SExt)
4215 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4216 DAG.getValueType(VA.getValVT()));
4217 else if (VA.getLocInfo() == CCValAssign::ZExt)
4218 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4219 DAG.getValueType(VA.getValVT()));
4220 else if (VA.getLocInfo() == CCValAssign::BCvt)
4221 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
4222
4223 if (VA.isExtInLoc()) {
4224 // Handle MMX values passed in XMM regs.
4225 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
4226 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
4227 else if (VA.getValVT().isVector() &&
4228 VA.getValVT().getScalarType() == MVT::i1 &&
4229 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
4230 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
4231 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
4232 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
4233 } else
4234 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4235 }
4236 } else {
4237 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4237, __extension__ __PRETTY_FUNCTION__))
;
4238 ArgValue =
4239 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
4240 }
4241
4242 // If value is passed via pointer - do a load.
4243 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
4244 ArgValue =
4245 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
4246
4247 InVals.push_back(ArgValue);
4248 }
4249
4250 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
4251 if (Ins[I].Flags.isSwiftAsync()) {
4252 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
4253 if (Subtarget.is64Bit())
4254 X86FI->setHasSwiftAsyncContext(true);
4255 else {
4256 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
4257 X86FI->setSwiftAsyncContextFrameIdx(FI);
4258 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
4259 DAG.getFrameIndex(FI, MVT::i32),
4260 MachinePointerInfo::getFixedStack(MF, FI));
4261 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
4262 }
4263 }
4264
4265 // Swift calling convention does not require we copy the sret argument
4266 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
4267 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
4268 continue;
4269
4270 // All x86 ABIs require that for returning structs by value we copy the
4271 // sret argument into %rax/%eax (depending on ABI) for the return. Save
4272 // the argument into a virtual register so that we can access it from the
4273 // return points.
4274 if (Ins[I].Flags.isSRet()) {
4275 assert(!FuncInfo->getSRetReturnReg() &&(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4276, __extension__
__PRETTY_FUNCTION__))
4276 "SRet return has already been set")(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4276, __extension__
__PRETTY_FUNCTION__))
;
4277 MVT PtrTy = getPointerTy(DAG.getDataLayout());
4278 Register Reg =
4279 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
4280 FuncInfo->setSRetReturnReg(Reg);
4281 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
4282 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
4283 break;
4284 }
4285 }
4286
4287 unsigned StackSize = CCInfo.getNextStackOffset();
4288 // Align stack specially for tail calls.
4289 if (shouldGuaranteeTCO(CallConv,
4290 MF.getTarget().Options.GuaranteedTailCallOpt))
4291 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
4292
4293 if (IsVarArg)
4294 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
4295 .lowerVarArgsParameters(Chain, StackSize);
4296
4297 // Some CCs need callee pop.
4298 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
4299 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4300 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
4301 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
4302 // X86 interrupts must pop the error code (and the alignment padding) if
4303 // present.
4304 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
4305 } else {
4306 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
4307 // If this is an sret function, the return should pop the hidden pointer.
4308 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
4309 FuncInfo->setBytesToPopOnReturn(4);
4310 }
4311
4312 if (!Is64Bit) {
4313 // RegSaveFrameIndex is X86-64 only.
4314 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4315 }
4316
4317 FuncInfo->setArgumentStackSize(StackSize);
4318
4319 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
4320 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
4321 if (Personality == EHPersonality::CoreCLR) {
4322 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "llvm/lib/Target/X86/X86ISelLowering.cpp", 4322,
__extension__ __PRETTY_FUNCTION__))
;
4323 // TODO: Add a mechanism to frame lowering that will allow us to indicate
4324 // that we'd prefer this slot be allocated towards the bottom of the frame
4325 // (i.e. near the stack pointer after allocating the frame). Every
4326 // funclet needs a copy of this slot in its (mostly empty) frame, and the
4327 // offset from the bottom of this and each funclet's frame must be the
4328 // same, so the size of funclets' (mostly empty) frames is dictated by
4329 // how far this slot is from the bottom (since they allocate just enough
4330 // space to accommodate holding this slot at the correct offset).
4331 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
4332 EHInfo->PSPSymFrameIdx = PSPSymFI;
4333 }
4334 }
4335
4336 if (shouldDisableCalleeSavedRegisterCC(CallConv) ||
4337 F.hasFnAttribute("no_caller_saved_registers")) {
4338 MachineRegisterInfo &MRI = MF.getRegInfo();
4339 for (std::pair<Register, Register> Pair : MRI.liveins())
4340 MRI.disableCalleeSavedRegister(Pair.first);
4341 }
4342
4343 return Chain;
4344}
4345
4346SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
4347 SDValue Arg, const SDLoc &dl,
4348 SelectionDAG &DAG,
4349 const CCValAssign &VA,
4350 ISD::ArgFlagsTy Flags,
4351 bool isByVal) const {
4352 unsigned LocMemOffset = VA.getLocMemOffset();
4353 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4354 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4355 StackPtr, PtrOff);
4356 if (isByVal)
4357 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
4358
4359 MaybeAlign Alignment;
4360 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
4361 Arg.getSimpleValueType() != MVT::f80)
4362 Alignment = MaybeAlign(4);
4363 return DAG.getStore(
4364 Chain, dl, Arg, PtrOff,
4365 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
4366 Alignment);
4367}
4368
4369/// Emit a load of return address if tail call
4370/// optimization is performed and it is required.
4371SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
4372 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
4373 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
4374 // Adjust the Return address stack slot.
4375 EVT VT = getPointerTy(DAG.getDataLayout());
4376 OutRetAddr = getReturnAddressFrameIndex(DAG);
4377
4378 // Load the "old" Return address.
4379 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
4380 return SDValue(OutRetAddr.getNode(), 1);
4381}
4382
4383/// Emit a store of the return address if tail call
4384/// optimization is performed and it is required (FPDiff!=0).
4385static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
4386 SDValue Chain, SDValue RetAddrFrIdx,
4387 EVT PtrVT, unsigned SlotSize,
4388 int FPDiff, const SDLoc &dl) {
4389 // Store the return address to the appropriate stack slot.
4390 if (!FPDiff) return Chain;
4391 // Calculate the new stack slot for the return address.
4392 int NewReturnAddrFI =
4393 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
4394 false);
4395 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
4396 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
4397 MachinePointerInfo::getFixedStack(
4398 DAG.getMachineFunction(), NewReturnAddrFI));
4399 return Chain;
4400}
4401
4402/// Returns a vector_shuffle mask for an movs{s|d}, movd
4403/// operation of specified width.
4404static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
4405 SDValue V2) {
4406 unsigned NumElems = VT.getVectorNumElements();
4407 SmallVector<int, 8> Mask;
4408 Mask.push_back(NumElems);
4409 for (unsigned i = 1; i != NumElems; ++i)
4410 Mask.push_back(i);
4411 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
4412}
4413
4414SDValue
4415X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4416 SmallVectorImpl<SDValue> &InVals) const {
4417 SelectionDAG &DAG = CLI.DAG;
4418 SDLoc &dl = CLI.DL;
4419 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4420 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4421 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4422 SDValue Chain = CLI.Chain;
4423 SDValue Callee = CLI.Callee;
4424 CallingConv::ID CallConv = CLI.CallConv;
4425 bool &isTailCall = CLI.IsTailCall;
4426 bool isVarArg = CLI.IsVarArg;
4427 const auto *CB = CLI.CB;
4428
4429 MachineFunction &MF = DAG.getMachineFunction();
4430 bool Is64Bit = Subtarget.is64Bit();
4431 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4432 bool IsSibcall = false;
4433 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
4434 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
4435 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
4436 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
4437 bool HasNCSR = (CB && isa<CallInst>(CB) &&
4438 CB->hasFnAttr("no_caller_saved_registers"));
4439 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
4440 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
4441 bool IsCFICall = IsIndirectCall && CLI.CFIType;
4442 const Module *M = MF.getMMI().getModule();
4443 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
4444
4445 MachineFunction::CallSiteInfo CSInfo;
4446 if (CallConv == CallingConv::X86_INTR)
4447 report_fatal_error("X86 interrupts may not be called directly");
4448
4449 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
4450 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
4451 // If we are using a GOT, disable tail calls to external symbols with
4452 // default visibility. Tail calling such a symbol requires using a GOT
4453 // relocation, which forces early binding of the symbol. This breaks code
4454 // that require lazy function symbol resolution. Using musttail or
4455 // GuaranteedTailCallOpt will override this.
4456 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4457 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
4458 G->getGlobal()->hasDefaultVisibility()))
4459 isTailCall = false;
4460 }
4461
4462 if (isTailCall && !IsMustTail) {
4463 // Check if it's really possible to do a tail call.
4464 isTailCall = IsEligibleForTailCallOptimization(
4465 Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
4466 Ins, DAG);
4467
4468 // Sibcalls are automatically detected tailcalls which do not require
4469 // ABI changes.
4470 if (!IsGuaranteeTCO && isTailCall)
4471 IsSibcall = true;
4472
4473 if (isTailCall)
4474 ++NumTailCalls;
4475 }
4476
4477 if (IsMustTail && !isTailCall)
4478 report_fatal_error("failed to perform tail call elimination on a call "
4479 "site marked musttail");
4480
4481 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4482, __extension__
__PRETTY_FUNCTION__))
4482 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4482, __extension__
__PRETTY_FUNCTION__))
;
4483
4484 // Analyze operands of the call, assigning locations to each operand.
4485 SmallVector<CCValAssign, 16> ArgLocs;
4486 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4487
4488 // Allocate shadow area for Win64.
4489 if (IsWin64)
4490 CCInfo.AllocateStack(32, Align(8));
4491
4492 CCInfo.AnalyzeArguments(Outs, CC_X86);
4493
4494 // In vectorcall calling convention a second pass is required for the HVA
4495 // types.
4496 if (CallingConv::X86_VectorCall == CallConv) {
4497 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
4498 }
4499
4500 // Get a count of how many bytes are to be pushed on the stack.
4501 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
4502 if (IsSibcall)
4503 // This is a sibcall. The memory operands are available in caller's
4504 // own caller's stack.
4505 NumBytes = 0;
4506 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4507 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4508
4509 int FPDiff = 0;
4510 if (isTailCall &&
4511 shouldGuaranteeTCO(CallConv,
4512 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4513 // Lower arguments at fp - stackoffset + fpdiff.
4514 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4515
4516 FPDiff = NumBytesCallerPushed - NumBytes;
4517
4518 // Set the delta of movement of the returnaddr stackslot.
4519 // But only set if delta is greater than previous delta.
4520 if (FPDiff < X86Info->getTCReturnAddrDelta())
4521 X86Info->setTCReturnAddrDelta(FPDiff);
4522 }
4523
4524 unsigned NumBytesToPush = NumBytes;
4525 unsigned NumBytesToPop = NumBytes;
4526
4527 // If we have an inalloca argument, all stack space has already been allocated
4528 // for us and be right at the top of the stack. We don't support multiple
4529 // arguments passed in memory when using inalloca.
4530 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4531 NumBytesToPush = 0;
4532 if (!ArgLocs.back().isMemLoc())
4533 report_fatal_error("cannot use inalloca attribute on a register "
4534 "parameter");
4535 if (ArgLocs.back().getLocMemOffset() != 0)
4536 report_fatal_error("any parameter with the inalloca attribute must be "
4537 "the only memory argument");
4538 } else if (CLI.IsPreallocated) {
4539 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4541, __extension__
__PRETTY_FUNCTION__))
4540 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4541, __extension__
__PRETTY_FUNCTION__))
4541 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4541, __extension__
__PRETTY_FUNCTION__))
;
4542 SmallVector<size_t, 4> PreallocatedOffsets;
4543 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4544 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4545 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4546 }
4547 }
4548 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4549 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4550 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4551 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4552 NumBytesToPush = 0;
4553 }
4554
4555 if (!IsSibcall && !IsMustTail)
4556 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4557 NumBytes - NumBytesToPush, dl);
4558
4559 SDValue RetAddrFrIdx;
4560 // Load return address for tail calls.
4561 if (isTailCall && FPDiff)
4562 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4563 Is64Bit, FPDiff, dl);
4564
4565 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4566 SmallVector<SDValue, 8> MemOpChains;
4567 SDValue StackPtr;
4568
4569 // The next loop assumes that the locations are in the same order of the
4570 // input arguments.
4571 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4572, __extension__
__PRETTY_FUNCTION__))
4572 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4572, __extension__
__PRETTY_FUNCTION__))
;
4573
4574 // Walk the register/memloc assignments, inserting copies/loads. In the case
4575 // of tail call optimization arguments are handle later.
4576 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4577 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4578 ++I, ++OutIndex) {
4579 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4579, __extension__
__PRETTY_FUNCTION__))
;
4580 // Skip inalloca/preallocated arguments, they have already been written.
4581 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4582 if (Flags.isInAlloca() || Flags.isPreallocated())
4583 continue;
4584
4585 CCValAssign &VA = ArgLocs[I];
4586 EVT RegVT = VA.getLocVT();
4587 SDValue Arg = OutVals[OutIndex];
4588 bool isByVal = Flags.isByVal();
4589
4590 // Promote the value if needed.
4591 switch (VA.getLocInfo()) {
4592 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4592)
;
4593 case CCValAssign::Full: break;
4594 case CCValAssign::SExt:
4595 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4596 break;
4597 case CCValAssign::ZExt:
4598 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4599 break;
4600 case CCValAssign::AExt:
4601 if (Arg.getValueType().isVector() &&
4602 Arg.getValueType().getVectorElementType() == MVT::i1)
4603 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4604 else if (RegVT.is128BitVector()) {
4605 // Special case: passing MMX values in XMM registers.
4606 Arg = DAG.getBitcast(MVT::i64, Arg);
4607 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4608 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4609 } else
4610 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4611 break;
4612 case CCValAssign::BCvt:
4613 Arg = DAG.getBitcast(RegVT, Arg);
4614 break;
4615 case CCValAssign::Indirect: {
4616 if (isByVal) {
4617 // Memcpy the argument to a temporary stack slot to prevent
4618 // the caller from seeing any modifications the callee may make
4619 // as guaranteed by the `byval` attribute.
4620 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4621 Flags.getByValSize(),
4622 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4623 SDValue StackSlot =
4624 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4625 Chain =
4626 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4627 // From now on treat this as a regular pointer
4628 Arg = StackSlot;
4629 isByVal = false;
4630 } else {
4631 // Store the argument.
4632 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4633 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4634 Chain = DAG.getStore(
4635 Chain, dl, Arg, SpillSlot,
4636 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4637 Arg = SpillSlot;
4638 }
4639 break;
4640 }
4641 }
4642
4643 if (VA.needsCustom()) {
4644 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4645, __extension__
__PRETTY_FUNCTION__))
4645 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4645, __extension__
__PRETTY_FUNCTION__))
;
4646 // Split v64i1 value into two registers
4647 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4648 } else if (VA.isRegLoc()) {
4649 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4650 const TargetOptions &Options = DAG.getTarget().Options;
4651 if (Options.EmitCallSiteInfo)
4652 CSInfo.emplace_back(VA.getLocReg(), I);
4653 if (isVarArg && IsWin64) {
4654 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4655 // shadow reg if callee is a varargs function.
4656 Register ShadowReg;
4657 switch (VA.getLocReg()) {
4658 case X86::XMM0: ShadowReg = X86::RCX; break;
4659 case X86::XMM1: ShadowReg = X86::RDX; break;
4660 case X86::XMM2: ShadowReg = X86::R8; break;
4661 case X86::XMM3: ShadowReg = X86::R9; break;
4662 }
4663 if (ShadowReg)
4664 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4665 }
4666 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4667 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4667, __extension__ __PRETTY_FUNCTION__))
;
4668 if (!StackPtr.getNode())
4669 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4670 getPointerTy(DAG.getDataLayout()));
4671 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4672 dl, DAG, VA, Flags, isByVal));
4673 }
4674 }
4675
4676 if (!MemOpChains.empty())
4677 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4678
4679 if (Subtarget.isPICStyleGOT()) {
4680 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4681 // GOT pointer (except regcall).
4682 if (!isTailCall) {
4683 // Indirect call with RegCall calling convertion may use up all the
4684 // general registers, so it is not suitable to bind EBX reister for
4685 // GOT address, just let register allocator handle it.
4686 if (CallConv != CallingConv::X86_RegCall)
4687 RegsToPass.push_back(std::make_pair(
4688 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4689 getPointerTy(DAG.getDataLayout()))));
4690 } else {
4691 // If we are tail calling and generating PIC/GOT style code load the
4692 // address of the callee into ECX. The value in ecx is used as target of
4693 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4694 // for tail calls on PIC/GOT architectures. Normally we would just put the
4695 // address of GOT into ebx and then call target@PLT. But for tail calls
4696 // ebx would be restored (since ebx is callee saved) before jumping to the
4697 // target@PLT.
4698
4699 // Note: The actual moving to ECX is done further down.
4700 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4701 if (G && !G->getGlobal()->hasLocalLinkage() &&
4702 G->getGlobal()->hasDefaultVisibility())
4703 Callee = LowerGlobalAddress(Callee, DAG);
4704 else if (isa<ExternalSymbolSDNode>(Callee))
4705 Callee = LowerExternalSymbol(Callee, DAG);
4706 }
4707 }
4708
4709 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
4710 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
4711 // From AMD64 ABI document:
4712 // For calls that may call functions that use varargs or stdargs
4713 // (prototype-less calls or calls to functions containing ellipsis (...) in
4714 // the declaration) %al is used as hidden argument to specify the number
4715 // of SSE registers used. The contents of %al do not need to match exactly
4716 // the number of registers, but must be an ubound on the number of SSE
4717 // registers used and is in the range 0 - 8 inclusive.
4718
4719 // Count the number of XMM registers allocated.
4720 static const MCPhysReg XMMArgRegs[] = {
4721 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4722 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4723 };
4724 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4725 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4726, __extension__
__PRETTY_FUNCTION__))
4726 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4726, __extension__
__PRETTY_FUNCTION__))
;
4727 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4728 DAG.getConstant(NumXMMRegs, dl,
4729 MVT::i8)));
4730 }
4731
4732 if (isVarArg && IsMustTail) {
4733 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4734 for (const auto &F : Forwards) {
4735 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4736 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4737 }
4738 }
4739
4740 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4741 // don't need this because the eligibility check rejects calls that require
4742 // shuffling arguments passed in memory.
4743 if (!IsSibcall && isTailCall) {
4744 // Force all the incoming stack arguments to be loaded from the stack
4745 // before any new outgoing arguments are stored to the stack, because the
4746 // outgoing stack slots may alias the incoming argument stack slots, and
4747 // the alias isn't otherwise explicit. This is slightly more conservative
4748 // than necessary, because it means that each store effectively depends
4749 // on every argument instead of just those arguments it would clobber.
4750 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4751
4752 SmallVector<SDValue, 8> MemOpChains2;
4753 SDValue FIN;
4754 int FI = 0;
4755 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4756 ++I, ++OutsIndex) {
4757 CCValAssign &VA = ArgLocs[I];
4758
4759 if (VA.isRegLoc()) {
4760 if (VA.needsCustom()) {
4761 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4762, __extension__
__PRETTY_FUNCTION__))
4762 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4762, __extension__
__PRETTY_FUNCTION__))
;
4763 // This means that we are in special case where one argument was
4764 // passed through two register locations - Skip the next location
4765 ++I;
4766 }
4767
4768 continue;
4769 }
4770
4771 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4771, __extension__ __PRETTY_FUNCTION__))
;
4772 SDValue Arg = OutVals[OutsIndex];
4773 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4774 // Skip inalloca/preallocated arguments. They don't require any work.
4775 if (Flags.isInAlloca() || Flags.isPreallocated())
4776 continue;
4777 // Create frame index.
4778 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4779 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4780 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4781 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4782
4783 if (Flags.isByVal()) {
4784 // Copy relative to framepointer.
4785 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4786 if (!StackPtr.getNode())
4787 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4788 getPointerTy(DAG.getDataLayout()));
4789 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4790 StackPtr, Source);
4791
4792 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4793 ArgChain,
4794 Flags, DAG, dl));
4795 } else {
4796 // Store relative to framepointer.
4797 MemOpChains2.push_back(DAG.getStore(
4798 ArgChain, dl, Arg, FIN,
4799 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4800 }
4801 }
4802
4803 if (!MemOpChains2.empty())
4804 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4805
4806 // Store the return address to the appropriate stack slot.
4807 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4808 getPointerTy(DAG.getDataLayout()),
4809 RegInfo->getSlotSize(), FPDiff, dl);
4810 }
4811
4812 // Build a sequence of copy-to-reg nodes chained together with token chain
4813 // and flag operands which copy the outgoing args into registers.
4814 SDValue InFlag;
4815 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4816 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4817 RegsToPass[i].second, InFlag);
4818 InFlag = Chain.getValue(1);
4819 }
4820
4821 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4822 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4822, __extension__
__PRETTY_FUNCTION__))
;
4823 // In the 64-bit large code model, we have to make all calls
4824 // through a register, since the call instruction's 32-bit
4825 // pc-relative offset may not be large enough to hold the whole
4826 // address.
4827 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4828 Callee->getOpcode() == ISD::ExternalSymbol) {
4829 // Lower direct calls to global addresses and external symbols. Setting
4830 // ForCall to true here has the effect of removing WrapperRIP when possible
4831 // to allow direct calls to be selected without first materializing the
4832 // address into a register.
4833 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4834 } else if (Subtarget.isTarget64BitILP32() &&
4835 Callee.getValueType() == MVT::i32) {
4836 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4837 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4838 }
4839
4840 // Returns a chain & a flag for retval copy to use.
4841 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4842 SmallVector<SDValue, 8> Ops;
4843
4844 if (!IsSibcall && isTailCall && !IsMustTail) {
4845 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InFlag, dl);
4846 InFlag = Chain.getValue(1);
4847 }
4848
4849 Ops.push_back(Chain);
4850 Ops.push_back(Callee);
4851
4852 if (isTailCall)
4853 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4854
4855 // Add argument registers to the end of the list so that they are known live
4856 // into the call.
4857 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4858 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4859 RegsToPass[i].second.getValueType()));
4860
4861 // Add a register mask operand representing the call-preserved registers.
4862 const uint32_t *Mask = [&]() {
4863 auto AdaptedCC = CallConv;
4864 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4865 // use X86_INTR calling convention because it has the same CSR mask
4866 // (same preserved registers).
4867 if (HasNCSR)
4868 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4869 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4870 // to use the CSR_NoRegs_RegMask.
4871 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4872 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4873 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4874 }();
4875 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4875, __extension__
__PRETTY_FUNCTION__))
;
4876
4877 // If this is an invoke in a 32-bit function using a funclet-based
4878 // personality, assume the function clobbers all registers. If an exception
4879 // is thrown, the runtime will not restore CSRs.
4880 // FIXME: Model this more precisely so that we can register allocate across
4881 // the normal edge and spill and fill across the exceptional edge.
4882 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4883 const Function &CallerFn = MF.getFunction();
4884 EHPersonality Pers =
4885 CallerFn.hasPersonalityFn()
4886 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4887 : EHPersonality::Unknown;
4888 if (isFuncletEHPersonality(Pers))
4889 Mask = RegInfo->getNoPreservedMask();
4890 }
4891
4892 // Define a new register mask from the existing mask.
4893 uint32_t *RegMask = nullptr;
4894
4895 // In some calling conventions we need to remove the used physical registers
4896 // from the reg mask.
4897 if (shouldDisableCalleeSavedRegisterCC(CallConv) || HasNCSR) {
4898 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4899
4900 // Allocate a new Reg Mask and copy Mask.
4901 RegMask = MF.allocateRegMask();
4902 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4903 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4904
4905 // Make sure all sub registers of the argument registers are reset
4906 // in the RegMask.
4907 for (auto const &RegPair : RegsToPass)
4908 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4909 SubRegs.isValid(); ++SubRegs)
4910 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4911
4912 // Create the RegMask Operand according to our updated mask.
4913 Ops.push_back(DAG.getRegisterMask(RegMask));
4914 } else {
4915 // Create the RegMask Operand according to the static mask.
4916 Ops.push_back(DAG.getRegisterMask(Mask));
4917 }
4918
4919 if (InFlag.getNode())
4920 Ops.push_back(InFlag);
4921
4922 if (isTailCall) {
4923 // We used to do:
4924 //// If this is the first return lowered for this function, add the regs
4925 //// to the liveout set for the function.
4926 // This isn't right, although it's probably harmless on x86; liveouts
4927 // should be computed from returns not tail calls. Consider a void
4928 // function making a tail call to a function returning int.
4929 MF.getFrameInfo().setHasTailCall();
4930 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4931
4932 if (IsCFICall)
4933 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4934
4935 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4936 return Ret;
4937 }
4938
4939 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4940 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4941 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4942 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4943 // expanded to the call, directly followed by a special marker sequence and
4944 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4945 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4946, __extension__
__PRETTY_FUNCTION__))
4946 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4946, __extension__
__PRETTY_FUNCTION__))
;
4947 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4947, __extension__
__PRETTY_FUNCTION__))
;
4948
4949 // Add a target global address for the retainRV/claimRV runtime function
4950 // just before the call target.
4951 Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
4952 auto PtrVT = getPointerTy(DAG.getDataLayout());
4953 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
4954 Ops.insert(Ops.begin() + 1, GA);
4955 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4956 } else {
4957 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4958 }
4959
4960 if (IsCFICall)
4961 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4962
4963 InFlag = Chain.getValue(1);
4964 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4965 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4966
4967 // Save heapallocsite metadata.
4968 if (CLI.CB)
4969 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4970 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4971
4972 // Create the CALLSEQ_END node.
4973 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
4974 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4975 DAG.getTarget().Options.GuaranteedTailCallOpt))
4976 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4977 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
4978 // If this call passes a struct-return pointer, the callee
4979 // pops that struct pointer.
4980 NumBytesForCalleeToPop = 4;
4981
4982 // Returns a flag for retval copy to use.
4983 if (!IsSibcall) {
4984 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
4985 InFlag, dl);
4986 InFlag = Chain.getValue(1);
4987 }
4988
4989 // Handle result values, copying them out of physregs into vregs that we
4990 // return.
4991 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4992 InVals, RegMask);
4993}
4994
4995//===----------------------------------------------------------------------===//
4996// Fast Calling Convention (tail call) implementation
4997//===----------------------------------------------------------------------===//
4998
4999// Like std call, callee cleans arguments, convention except that ECX is
5000// reserved for storing the tail called function address. Only 2 registers are
5001// free for argument passing (inreg). Tail call optimization is performed
5002// provided:
5003// * tailcallopt is enabled
5004// * caller/callee are fastcc
5005// On X86_64 architecture with GOT-style position independent code only local
5006// (within module) calls are supported at the moment.
5007// To keep the stack aligned according to platform abi the function
5008// GetAlignedArgumentStackSize ensures that argument delta is always multiples
5009// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
5010// If a tail called function callee has more arguments than the caller the
5011// caller needs to make sure that there is room to move the RETADDR to. This is
5012// achieved by reserving an area the size of the argument delta right after the
5013// original RETADDR, but before the saved framepointer or the spilled registers
5014// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
5015// stack layout:
5016// arg1
5017// arg2
5018// RETADDR
5019// [ new RETADDR
5020// move area ]
5021// (possible EBP)
5022// ESI
5023// EDI
5024// local1 ..
5025
5026/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
5027/// requirement.
5028unsigned
5029X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
5030 SelectionDAG &DAG) const {
5031 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
5032 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
5033 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5034, __extension__
__PRETTY_FUNCTION__))
5034 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5034, __extension__
__PRETTY_FUNCTION__))
;
5035 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
5036}
5037
5038/// Return true if the given stack call argument is already available in the
5039/// same position (relatively) of the caller's incoming argument stack.
5040static
5041bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
5042 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
5043 const X86InstrInfo *TII, const CCValAssign &VA) {
5044 unsigned Bytes = Arg.getValueSizeInBits() / 8;
5045
5046 for (;;) {
5047 // Look through nodes that don't alter the bits of the incoming value.
5048 unsigned Op = Arg.getOpcode();
5049 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
5050 Arg = Arg.getOperand(0);
5051 continue;
5052 }
5053 if (Op == ISD::TRUNCATE) {
5054 const SDValue &TruncInput = Arg.getOperand(0);
5055 if (TruncInput.getOpcode() == ISD::AssertZext &&
5056 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
5057 Arg.getValueType()) {
5058 Arg = TruncInput.getOperand(0);
5059 continue;
5060 }
5061 }
5062 break;
5063 }
5064
5065 int FI = INT_MAX2147483647;
5066 if (Arg.getOpcode() == ISD::CopyFromReg) {
5067 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
5068 if (!VR.isVirtual())
5069 return false;
5070 MachineInstr *Def = MRI->getVRegDef(VR);
5071 if (!Def)
5072 return false;
5073 if (!Flags.isByVal()) {
5074 if (!TII->isLoadFromStackSlot(*Def, FI))
5075 return false;
5076 } else {
5077 unsigned Opcode = Def->getOpcode();
5078 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
5079 Opcode == X86::LEA64_32r) &&
5080 Def->getOperand(1).isFI()) {
5081 FI = Def->getOperand(1).getIndex();
5082 Bytes = Flags.getByValSize();
5083 } else
5084 return false;
5085 }
5086 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
5087 if (Flags.isByVal())
5088 // ByVal argument is passed in as a pointer but it's now being
5089 // dereferenced. e.g.
5090 // define @foo(%struct.X* %A) {
5091 // tail call @bar(%struct.X* byval %A)
5092 // }
5093 return false;
5094 SDValue Ptr = Ld->getBasePtr();
5095 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
5096 if (!FINode)
5097 return false;
5098 FI = FINode->getIndex();
5099 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
5100 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
5101 FI = FINode->getIndex();
5102 Bytes = Flags.getByValSize();
5103 } else
5104 return false;
5105
5106 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "llvm/lib/Target/X86/X86ISelLowering.cpp",
5106, __extension__ __PRETTY_FUNCTION__))
;
5107 if (!MFI.isFixedObjectIndex(FI))
5108 return false;
5109
5110 if (Offset != MFI.getObjectOffset(FI))
5111 return false;
5112
5113 // If this is not byval, check that the argument stack object is immutable.
5114 // inalloca and argument copy elision can create mutable argument stack
5115 // objects. Byval objects can be mutated, but a byval call intends to pass the
5116 // mutated memory.
5117 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
5118 return false;
5119
5120 if (VA.getLocVT().getFixedSizeInBits() >
5121 Arg.getValueSizeInBits().getFixedValue()) {
5122 // If the argument location is wider than the argument type, check that any
5123 // extension flags match.
5124 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
5125 Flags.isSExt() != MFI.isObjectSExt(FI)) {
5126 return false;
5127 }
5128 }
5129
5130 return Bytes == MFI.getObjectSize(FI);
5131}
5132
5133/// Check whether the call is eligible for tail call optimization. Targets
5134/// that want to do tail call optimization should implement this function.
5135bool X86TargetLowering::IsEligibleForTailCallOptimization(
5136 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
5137 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
5138 const SmallVectorImpl<SDValue> &OutVals,
5139 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5140 if (!mayTailCallThisCC(CalleeCC))
5141 return false;
5142
5143 // If -tailcallopt is specified, make fastcc functions tail-callable.
5144 MachineFunction &MF = DAG.getMachineFunction();
5145 const Function &CallerF = MF.getFunction();
5146
5147 // If the function return type is x86_fp80 and the callee return type is not,
5148 // then the FP_EXTEND of the call result is not a nop. It's not safe to
5149 // perform a tailcall optimization here.
5150 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
5151 return false;
5152
5153 CallingConv::ID CallerCC = CallerF.getCallingConv();
5154 bool CCMatch = CallerCC == CalleeCC;
5155 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
5156 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
5157 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
5158 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
5159
5160 // Win64 functions have extra shadow space for argument homing. Don't do the
5161 // sibcall if the caller and callee have mismatched expectations for this
5162 // space.
5163 if (IsCalleeWin64 != IsCallerWin64)
5164 return false;
5165
5166 if (IsGuaranteeTCO) {
5167 if (canGuaranteeTCO(CalleeCC) && CCMatch)
5168 return true;
5169 return false;
5170 }
5171
5172 // Look for obvious safe cases to perform tail call optimization that do not
5173 // require ABI changes. This is what gcc calls sibcall.
5174
5175 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
5176 // emit a special epilogue.
5177 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5178 if (RegInfo->hasStackRealignment(MF))
5179 return false;
5180
5181 // Also avoid sibcall optimization if we're an sret return fn and the callee
5182 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
5183 // insufficient.
5184 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
5185 // For a compatible tail call the callee must return our sret pointer. So it
5186 // needs to be (a) an sret function itself and (b) we pass our sret as its
5187 // sret. Condition #b is harder to determine.
5188 return false;
5189 } else if (IsCalleePopSRet)
5190 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
5191 // expect that.
5192 return false;
5193
5194 // Do not sibcall optimize vararg calls unless all arguments are passed via
5195 // registers.
5196 LLVMContext &C = *DAG.getContext();
5197 if (isVarArg && !Outs.empty()) {
5198 // Optimizing for varargs on Win64 is unlikely to be safe without
5199 // additional testing.
5200 if (IsCalleeWin64 || IsCallerWin64)
5201 return false;
5202
5203 SmallVector<CCValAssign, 16> ArgLocs;
5204 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5205
5206 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5207 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
5208 if (!ArgLocs[i].isRegLoc())
5209 return false;
5210 }
5211
5212 // If the call result is in ST0 / ST1, it needs to be popped off the x87
5213 // stack. Therefore, if it's not used by the call it is not safe to optimize
5214 // this into a sibcall.
5215 bool Unused = false;
5216 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5217 if (!Ins[i].Used) {
5218 Unused = true;
5219 break;
5220 }
5221 }
5222 if (Unused) {
5223 SmallVector<CCValAssign, 16> RVLocs;
5224 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
5225 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
5226 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5227 CCValAssign &VA = RVLocs[i];
5228 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
5229 return false;
5230 }
5231 }
5232
5233 // Check that the call results are passed in the same way.
5234 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5235 RetCC_X86, RetCC_X86))
5236 return false;
5237 // The callee has to preserve all registers the caller needs to preserve.
5238 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
5239 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5240 if (!CCMatch) {
5241 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5242 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5243 return false;
5244 }
5245
5246 unsigned StackArgsSize = 0;
5247
5248 // If the callee takes no arguments then go on to check the results of the
5249 // call.
5250 if (!Outs.empty()) {
5251 // Check if stack adjustment is needed. For now, do not do this if any
5252 // argument is passed on the stack.
5253 SmallVector<CCValAssign, 16> ArgLocs;
5254 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5255
5256 // Allocate shadow area for Win64
5257 if (IsCalleeWin64)
5258 CCInfo.AllocateStack(32, Align(8));
5259
5260 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5261 StackArgsSize = CCInfo.getNextStackOffset();
5262
5263 if (CCInfo.getNextStackOffset()) {
5264 // Check if the arguments are already laid out in the right way as
5265 // the caller's fixed stack objects.
5266 MachineFrameInfo &MFI = MF.getFrameInfo();
5267 const MachineRegisterInfo *MRI = &MF.getRegInfo();
5268 const X86InstrInfo *TII = Subtarget.getInstrInfo();
5269 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5270 CCValAssign &VA = ArgLocs[i];
5271 SDValue Arg = OutVals[i];
5272 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5273 if (VA.getLocInfo() == CCValAssign::Indirect)
5274 return false;
5275 if (!VA.isRegLoc()) {
5276 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
5277 MFI, MRI, TII, VA))
5278 return false;
5279 }
5280 }
5281 }
5282
5283 bool PositionIndependent = isPositionIndependent();
5284 // If the tailcall address may be in a register, then make sure it's
5285 // possible to register allocate for it. In 32-bit, the call address can
5286 // only target EAX, EDX, or ECX since the tail call must be scheduled after
5287 // callee-saved registers are restored. These happen to be the same
5288 // registers used to pass 'inreg' arguments so watch out for those.
5289 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
5290 !isa<ExternalSymbolSDNode>(Callee)) ||
5291 PositionIndependent)) {
5292 unsigned NumInRegs = 0;
5293 // In PIC we need an extra register to formulate the address computation
5294 // for the callee.
5295 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
5296
5297 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5298 CCValAssign &VA = ArgLocs[i];
5299 if (!VA.isRegLoc())
5300 continue;
5301 Register Reg = VA.getLocReg();
5302 switch (Reg) {
5303 default: break;
5304 case X86::EAX: case X86::EDX: case X86::ECX:
5305 if (++NumInRegs == MaxInRegs)
5306 return false;
5307 break;
5308 }
5309 }
5310 }
5311
5312 const MachineRegisterInfo &MRI = MF.getRegInfo();
5313 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5314 return false;
5315 }
5316
5317 bool CalleeWillPop =
5318 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
5319 MF.getTarget().Options.GuaranteedTailCallOpt);
5320
5321 if (unsigned BytesToPop =
5322 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
5323 // If we have bytes to pop, the callee must pop them.
5324 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
5325 if (!CalleePopMatches)
5326 return false;
5327 } else if (CalleeWillPop && StackArgsSize > 0) {
5328 // If we don't have bytes to pop, make sure the callee doesn't pop any.
5329 return false;
5330 }
5331
5332 return true;
5333}
5334
5335FastISel *
5336X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
5337 const TargetLibraryInfo *libInfo) const {
5338 return X86::createFastISel(funcInfo, libInfo);
5339}
5340
5341//===----------------------------------------------------------------------===//
5342// Other Lowering Hooks
5343//===----------------------------------------------------------------------===//
5344
5345bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
5346 bool AssumeSingleUse) {
5347 if (!AssumeSingleUse && !Op.hasOneUse())
5348 return false;
5349 if (!ISD::isNormalLoad(Op.getNode()))
5350 return false;
5351
5352 // If this is an unaligned vector, make sure the target supports folding it.
5353 auto *Ld = cast<LoadSDNode>(Op.getNode());
5354 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
5355 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
5356 return false;
5357
5358 // TODO: If this is a non-temporal load and the target has an instruction
5359 // for it, it should not be folded. See "useNonTemporalLoad()".
5360
5361 return true;
5362}
5363
5364bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
5365 const X86Subtarget &Subtarget,
5366 bool AssumeSingleUse) {
5367 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory")(static_cast <bool> (Subtarget.hasAVX() && "Expected AVX for broadcast from memory"
) ? void (0) : __assert_fail ("Subtarget.hasAVX() && \"Expected AVX for broadcast from memory\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5367, __extension__
__PRETTY_FUNCTION__))
;
5368 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
5369 return false;
5370
5371 // We can not replace a wide volatile load with a broadcast-from-memory,
5372 // because that would narrow the load, which isn't legal for volatiles.
5373 auto *Ld = cast<LoadSDNode>(Op.getNode());
5374 return !Ld->isVolatile() ||
5375 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
5376}
5377
5378bool X86::mayFoldIntoStore(SDValue Op) {
5379 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
5380}
5381
5382bool X86::mayFoldIntoZeroExtend(SDValue Op) {
5383 if (Op.hasOneUse()) {
5384 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
5385 return (ISD::ZERO_EXTEND == Opcode);
5386 }
5387 return false;
5388}
5389
5390static bool isTargetShuffle(unsigned Opcode) {
5391 switch(Opcode) {
5392 default: return false;
5393 case X86ISD::BLENDI:
5394 case X86ISD::PSHUFB:
5395 case X86ISD::PSHUFD:
5396 case X86ISD::PSHUFHW:
5397 case X86ISD::PSHUFLW:
5398 case X86ISD::SHUFP:
5399 case X86ISD::INSERTPS:
5400 case X86ISD::EXTRQI:
5401 case X86ISD::INSERTQI:
5402 case X86ISD::VALIGN:
5403 case X86ISD::PALIGNR:
5404 case X86ISD::VSHLDQ:
5405 case X86ISD::VSRLDQ:
5406 case X86ISD::MOVLHPS:
5407 case X86ISD::MOVHLPS:
5408 case X86ISD::MOVSHDUP:
5409 case X86ISD::MOVSLDUP:
5410 case X86ISD::MOVDDUP:
5411 case X86ISD::MOVSS:
5412 case X86ISD::MOVSD:
5413 case X86ISD::MOVSH:
5414 case X86ISD::UNPCKL:
5415 case X86ISD::UNPCKH:
5416 case X86ISD::VBROADCAST:
5417 case X86ISD::VPERMILPI:
5418 case X86ISD::VPERMILPV:
5419 case X86ISD::VPERM2X128:
5420 case X86ISD::SHUF128:
5421 case X86ISD::VPERMIL2:
5422 case X86ISD::VPERMI:
5423 case X86ISD::VPPERM:
5424 case X86ISD::VPERMV:
5425 case X86ISD::VPERMV3:
5426 case X86ISD::VZEXT_MOVL:
5427 return true;
5428 }
5429}
5430
5431static bool isTargetShuffleVariableMask(unsigned Opcode) {
5432 switch (Opcode) {
5433 default: return false;
5434 // Target Shuffles.
5435 case X86ISD::PSHUFB:
5436 case X86ISD::VPERMILPV:
5437 case X86ISD::VPERMIL2:
5438 case X86ISD::VPPERM:
5439 case X86ISD::VPERMV:
5440 case X86ISD::VPERMV3:
5441 return true;
5442 // 'Faux' Target Shuffles.
5443 case ISD::OR:
5444 case ISD::AND:
5445 case X86ISD::ANDNP:
5446 return true;
5447 }
5448}
5449
5450SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
5451 MachineFunction &MF = DAG.getMachineFunction();
5452 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5453 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
5454 int ReturnAddrIndex = FuncInfo->getRAIndex();
5455
5456 if (ReturnAddrIndex == 0) {
5457 // Set up a frame object for the return address.
5458 unsigned SlotSize = RegInfo->getSlotSize();
5459 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
5460 -(int64_t)SlotSize,
5461 false);
5462 FuncInfo->setRAIndex(ReturnAddrIndex);
5463 }
5464
5465 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
5466}
5467
5468bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
5469 bool hasSymbolicDisplacement) {
5470 // Offset should fit into 32 bit immediate field.
5471 if (!isInt<32>(Offset))
5472 return false;
5473
5474 // If we don't have a symbolic displacement - we don't have any extra
5475 // restrictions.
5476 if (!hasSymbolicDisplacement)
5477 return true;
5478
5479 // FIXME: Some tweaks might be needed for medium code model.
5480 if (M != CodeModel::Small && M != CodeModel::Kernel)
5481 return false;
5482
5483 // For small code model we assume that latest object is 16MB before end of 31
5484 // bits boundary. We may also accept pretty large negative constants knowing
5485 // that all objects are in the positive half of address space.
5486 if (M == CodeModel::Small && Offset < 16*1024*1024)
5487 return true;
5488
5489 // For kernel code model we know that all object resist in the negative half
5490 // of 32bits address space. We may not accept negative offsets, since they may
5491 // be just off and we may accept pretty large positive ones.
5492 if (M == CodeModel::Kernel && Offset >= 0)
5493 return true;
5494
5495 return false;
5496}
5497
5498/// Determines whether the callee is required to pop its own arguments.
5499/// Callee pop is necessary to support tail calls.
5500bool X86::isCalleePop(CallingConv::ID CallingConv,
5501 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
5502 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
5503 // can guarantee TCO.
5504 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
5505 return true;
5506
5507 switch (CallingConv) {
5508 default:
5509 return false;
5510 case CallingConv::X86_StdCall:
5511 case CallingConv::X86_FastCall:
5512 case CallingConv::X86_ThisCall:
5513 case CallingConv::X86_VectorCall:
5514 return !is64Bit;
5515 }
5516}
5517
5518/// Return true if the condition is an signed comparison operation.
5519static bool isX86CCSigned(unsigned X86CC) {
5520 switch (X86CC) {
5521 default:
5522 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5522)
;
5523 case X86::COND_E:
5524 case X86::COND_NE:
5525 case X86::COND_B:
5526 case X86::COND_A:
5527 case X86::COND_BE:
5528 case X86::COND_AE:
5529 return false;
5530 case X86::COND_G:
5531 case X86::COND_GE:
5532 case X86::COND_L:
5533 case X86::COND_LE:
5534 return true;
5535 }
5536}
5537
5538static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5539 switch (SetCCOpcode) {
5540 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5540)
;
5541 case ISD::SETEQ: return X86::COND_E;
5542 case ISD::SETGT: return X86::COND_G;
5543 case ISD::SETGE: return X86::COND_GE;
5544 case ISD::SETLT: return X86::COND_L;
5545 case ISD::SETLE: return X86::COND_LE;
5546 case ISD::SETNE: return X86::COND_NE;
5547 case ISD::SETULT: return X86::COND_B;
5548 case ISD::SETUGT: return X86::COND_A;
5549 case ISD::SETULE: return X86::COND_BE;
5550 case ISD::SETUGE: return X86::COND_AE;
5551 }
5552}
5553
5554/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5555/// condition code, returning the condition code and the LHS/RHS of the
5556/// comparison to make.
5557static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5558 bool isFP, SDValue &LHS, SDValue &RHS,
5559 SelectionDAG &DAG) {
5560 if (!isFP) {
5561 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5562 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
5563 // X > -1 -> X == 0, jump !sign.
5564 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5565 return X86::COND_NS;
5566 }
5567 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
5568 // X < 0 -> X == 0, jump on sign.
5569 return X86::COND_S;
5570 }
5571 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
5572 // X >= 0 -> X == 0, jump on !sign.
5573 return X86::COND_NS;
5574 }
5575 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5576 // X < 1 -> X <= 0
5577 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5578 return X86::COND_LE;
5579 }
5580 }
5581
5582 return TranslateIntegerX86CC(SetCCOpcode);
5583 }
5584
5585 // First determine if it is required or is profitable to flip the operands.
5586
5587 // If LHS is a foldable load, but RHS is not, flip the condition.
5588 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5589 !ISD::isNON_EXTLoad(RHS.getNode())) {
5590 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5591 std::swap(LHS, RHS);
5592 }
5593
5594 switch (SetCCOpcode) {
5595 default: break;
5596 case ISD::SETOLT:
5597 case ISD::SETOLE:
5598 case ISD::SETUGT:
5599 case ISD::SETUGE:
5600 std::swap(LHS, RHS);
5601 break;
5602 }
5603
5604 // On a floating point condition, the flags are set as follows:
5605 // ZF PF CF op
5606 // 0 | 0 | 0 | X > Y
5607 // 0 | 0 | 1 | X < Y
5608 // 1 | 0 | 0 | X == Y
5609 // 1 | 1 | 1 | unordered
5610 switch (SetCCOpcode) {
5611 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5611)
;
5612 case ISD::SETUEQ:
5613 case ISD::SETEQ: return X86::COND_E;
5614 case ISD::SETOLT: // flipped
5615 case ISD::SETOGT:
5616 case ISD::SETGT: return X86::COND_A;
5617 case ISD::SETOLE: // flipped
5618 case ISD::SETOGE:
5619 case ISD::SETGE: return X86::COND_AE;
5620 case ISD::SETUGT: // flipped
5621 case ISD::SETULT:
5622 case ISD::SETLT: return X86::COND_B;
5623 case ISD::SETUGE: // flipped
5624 case ISD::SETULE:
5625 case ISD::SETLE: return X86::COND_BE;
5626 case ISD::SETONE:
5627 case ISD::SETNE: return X86::COND_NE;
5628 case ISD::SETUO: return X86::COND_P;
5629 case ISD::SETO: return X86::COND_NP;
5630 case ISD::SETOEQ:
5631 case ISD::SETUNE: return X86::COND_INVALID;
5632 }
5633}
5634
5635/// Is there a floating point cmov for the specific X86 condition code?
5636/// Current x86 isa includes the following FP cmov instructions:
5637/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5638static bool hasFPCMov(unsigned X86CC) {
5639 switch (X86CC) {
5640 default:
5641 return false;
5642 case X86::COND_B:
5643 case X86::COND_BE:
5644 case X86::COND_E:
5645 case X86::COND_P:
5646 case X86::COND_A:
5647 case X86::COND_AE:
5648 case X86::COND_NE:
5649 case X86::COND_NP:
5650 return true;
5651 }
5652}
5653
5654static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
5655 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
5656 VT.is512BitVector();
5657}
5658
5659bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5660 const CallInst &I,
5661 MachineFunction &MF,
5662 unsigned Intrinsic) const {
5663 Info.flags = MachineMemOperand::MONone;
5664 Info.offset = 0;
5665
5666 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5667 if (!IntrData) {
5668 switch (Intrinsic) {
5669 case Intrinsic::x86_aesenc128kl:
5670 case Intrinsic::x86_aesdec128kl:
5671 Info.opc = ISD::INTRINSIC_W_CHAIN;
5672 Info.ptrVal = I.getArgOperand(1);
5673 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5674 Info.align = Align(1);
5675 Info.flags |= MachineMemOperand::MOLoad;
5676 return true;
5677 case Intrinsic::x86_aesenc256kl:
5678 case Intrinsic::x86_aesdec256kl:
5679 Info.opc = ISD::INTRINSIC_W_CHAIN;
5680 Info.ptrVal = I.getArgOperand(1);
5681 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5682 Info.align = Align(1);
5683 Info.flags |= MachineMemOperand::MOLoad;
5684 return true;
5685 case Intrinsic::x86_aesencwide128kl:
5686 case Intrinsic::x86_aesdecwide128kl:
5687 Info.opc = ISD::INTRINSIC_W_CHAIN;
5688 Info.ptrVal = I.getArgOperand(0);
5689 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5690 Info.align = Align(1);
5691 Info.flags |= MachineMemOperand::MOLoad;
5692 return true;
5693 case Intrinsic::x86_aesencwide256kl:
5694 case Intrinsic::x86_aesdecwide256kl:
5695 Info.opc = ISD::INTRINSIC_W_CHAIN;
5696 Info.ptrVal = I.getArgOperand(0);
5697 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5698 Info.align = Align(1);
5699 Info.flags |= MachineMemOperand::MOLoad;
5700 return true;
5701 case Intrinsic::x86_cmpccxadd32:
5702 case Intrinsic::x86_cmpccxadd64:
5703 case Intrinsic::x86_atomic_bts:
5704 case Intrinsic::x86_atomic_btc:
5705 case Intrinsic::x86_atomic_btr: {
5706 Info.opc = ISD::INTRINSIC_W_CHAIN;
5707 Info.ptrVal = I.getArgOperand(0);
5708 unsigned Size = I.getType()->getScalarSizeInBits();
5709 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5710 Info.align = Align(Size);
5711 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5712 MachineMemOperand::MOVolatile;
5713 return true;
5714 }
5715 case Intrinsic::x86_atomic_bts_rm:
5716 case Intrinsic::x86_atomic_btc_rm:
5717 case Intrinsic::x86_atomic_btr_rm: {
5718 Info.opc = ISD::INTRINSIC_W_CHAIN;
5719 Info.ptrVal = I.getArgOperand(0);
5720 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
5721 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5722 Info.align = Align(Size);
5723 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5724 MachineMemOperand::MOVolatile;
5725 return true;
5726 }
5727 case Intrinsic::x86_aadd32:
5728 case Intrinsic::x86_aadd64:
5729 case Intrinsic::x86_aand32:
5730 case Intrinsic::x86_aand64:
5731 case Intrinsic::x86_aor32:
5732 case Intrinsic::x86_aor64:
5733 case Intrinsic::x86_axor32:
5734 case Intrinsic::x86_axor64:
5735 case Intrinsic::x86_atomic_add_cc:
5736 case Intrinsic::x86_atomic_sub_cc:
5737 case Intrinsic::x86_atomic_or_cc:
5738 case Intrinsic::x86_atomic_and_cc:
5739 case Intrinsic::x86_atomic_xor_cc: {
5740 Info.opc = ISD::INTRINSIC_W_CHAIN;
5741 Info.ptrVal = I.getArgOperand(0);
5742 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
5743 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5744 Info.align = Align(Size);
5745 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5746 MachineMemOperand::MOVolatile;
5747 return true;
5748 }
5749 }
5750 return false;
5751 }
5752
5753 switch (IntrData->Type) {
5754 case TRUNCATE_TO_MEM_VI8:
5755 case TRUNCATE_TO_MEM_VI16:
5756 case TRUNCATE_TO_MEM_VI32: {
5757 Info.opc = ISD::INTRINSIC_VOID;
5758 Info.ptrVal = I.getArgOperand(0);
5759 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5760 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5761 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5762 ScalarVT = MVT::i8;
5763 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5764 ScalarVT = MVT::i16;
5765 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5766 ScalarVT = MVT::i32;
5767
5768 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5769 Info.align = Align(1);
5770 Info.flags |= MachineMemOperand::MOStore;
5771 break;
5772 }
5773 case GATHER:
5774 case GATHER_AVX2: {
5775 Info.opc = ISD::INTRINSIC_W_CHAIN;
5776 Info.ptrVal = nullptr;
5777 MVT DataVT = MVT::getVT(I.getType());
5778 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5779 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5780 IndexVT.getVectorNumElements());
5781 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5782 Info.align = Align(1);
5783 Info.flags |= MachineMemOperand::MOLoad;
5784 break;
5785 }
5786 case SCATTER: {
5787 Info.opc = ISD::INTRINSIC_VOID;
5788 Info.ptrVal = nullptr;
5789 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5790 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5791 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5792 IndexVT.getVectorNumElements());
5793 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5794 Info.align = Align(1);
5795 Info.flags |= MachineMemOperand::MOStore;
5796 break;
5797 }
5798 default:
5799 return false;
5800 }
5801
5802 return true;
5803}
5804
5805/// Returns true if the target can instruction select the
5806/// specified FP immediate natively. If false, the legalizer will
5807/// materialize the FP immediate as a load from a constant pool.
5808bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5809 bool ForCodeSize) const {
5810 for (const APFloat &FPImm : LegalFPImmediates)
5811 if (Imm.bitwiseIsEqual(FPImm))
5812 return true;
5813 return false;
5814}
5815
5816bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5817 ISD::LoadExtType ExtTy,
5818 EVT NewVT) const {
5819 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5819, __extension__
__PRETTY_FUNCTION__))
;
5820
5821 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5822 // relocation target a movq or addq instruction: don't let the load shrink.
5823 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5824 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5825 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5826 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5827
5828 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5829 // those uses are extracted directly into a store, then the extract + store
5830 // can be store-folded. Therefore, it's probably not worth splitting the load.
5831 EVT VT = Load->getValueType(0);
5832 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5833 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5834 // Skip uses of the chain value. Result 0 of the node is the load value.
5835 if (UI.getUse().getResNo() != 0)
5836 continue;
5837
5838 // If this use is not an extract + store, it's probably worth splitting.
5839 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5840 UI->use_begin()->getOpcode() != ISD::STORE)
5841 return true;
5842 }
5843 // All non-chain uses are extract + store.
5844 return false;
5845 }
5846
5847 return true;
5848}
5849
5850/// Returns true if it is beneficial to convert a load of a constant
5851/// to just the constant itself.
5852bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5853 Type *Ty) const {
5854 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5854, __extension__ __PRETTY_FUNCTION__))
;
5855
5856 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5857 if (BitSize == 0 || BitSize > 64)
5858 return false;
5859 return true;
5860}
5861
5862bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5863 // If we are using XMM registers in the ABI and the condition of the select is
5864 // a floating-point compare and we have blendv or conditional move, then it is
5865 // cheaper to select instead of doing a cross-register move and creating a
5866 // load that depends on the compare result.
5867 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5868 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5869}
5870
5871bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5872 // TODO: It might be a win to ease or lift this restriction, but the generic
5873 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5874 if (VT.isVector() && Subtarget.hasAVX512())
5875 return false;
5876
5877 return true;
5878}
5879
5880bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5881 SDValue C) const {
5882 // TODO: We handle scalars using custom code, but generic combining could make
5883 // that unnecessary.
5884 APInt MulC;
5885 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5886 return false;
5887
5888 // Find the type this will be legalized too. Otherwise we might prematurely
5889 // convert this to shl+add/sub and then still have to type legalize those ops.
5890 // Another choice would be to defer the decision for illegal types until
5891 // after type legalization. But constant splat vectors of i64 can't make it
5892 // through type legalization on 32-bit targets so we would need to special
5893 // case vXi64.
5894 while (getTypeAction(Context, VT) != TypeLegal)
5895 VT = getTypeToTransformTo(Context, VT);
5896
5897 // If vector multiply is legal, assume that's faster than shl + add/sub.
5898 // Multiply is a complex op with higher latency and lower throughput in
5899 // most implementations, sub-vXi32 vector multiplies are always fast,
5900 // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
5901 // is always going to be slow.
5902 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5903 if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
5904 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
5905 return false;
5906
5907 // shl+add, shl+sub, shl+add+neg
5908 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5909 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5910}
5911
5912bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5913 unsigned Index) const {
5914 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5915 return false;
5916
5917 // Mask vectors support all subregister combinations and operations that
5918 // extract half of vector.
5919 if (ResVT.getVectorElementType() == MVT::i1)
5920 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5921 (Index == ResVT.getVectorNumElements()));
5922
5923 return (Index % ResVT.getVectorNumElements()) == 0;
5924}
5925
5926bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5927 unsigned Opc = VecOp.getOpcode();
5928
5929 // Assume target opcodes can't be scalarized.
5930 // TODO - do we have any exceptions?
5931 if (Opc >= ISD::BUILTIN_OP_END)
5932 return false;
5933
5934 // If the vector op is not supported, try to convert to scalar.
5935 EVT VecVT = VecOp.getValueType();
5936 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5937 return true;
5938
5939 // If the vector op is supported, but the scalar op is not, the transform may
5940 // not be worthwhile.
5941 EVT ScalarVT = VecVT.getScalarType();
5942 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5943}
5944
5945bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5946 bool) const {
5947 // TODO: Allow vectors?
5948 if (VT.isVector())