Bug Summary

File:build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 13096, column 55
The result of the '%' expression is undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/X86 -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/X86 -I include -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-09-04-125545-48738-1 -x c++ /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/X86/X86ISelLowering.cpp
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/IntrinsicLowering.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineJumpTableInfo.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetLowering.h"
43#include "llvm/CodeGen/WinEHFuncInfo.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/DiagnosticInfo.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/PatternMatch.h"
55#include "llvm/MC/MCAsmInfo.h"
56#include "llvm/MC/MCContext.h"
57#include "llvm/MC/MCExpr.h"
58#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <bitset>
67#include <cctype>
68#include <numeric>
69using namespace llvm;
70
71#define DEBUG_TYPE"x86-isel" "x86-isel"
72
73STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
74
75static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
76 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
77 cl::desc(
78 "Sets the preferable loop alignment for experiments (as log2 bytes) "
79 "for innermost loops only. If specified, this option overrides "
80 "alignment set by x86-experimental-pref-loop-alignment."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
108 const X86Subtarget &STI)
109 : TargetLowering(TM), Subtarget(STI) {
110 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
111 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
112
113 // Set up the TargetLowering object.
114
115 // X86 is weird. It always uses i8 for shift amounts and setcc results.
116 setBooleanContents(ZeroOrOneBooleanContent);
117 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
118 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
119
120 // For 64-bit, since we have so many registers, use the ILP scheduler.
121 // For 32-bit, use the register pressure specific scheduling.
122 // For Atom, always use ILP scheduling.
123 if (Subtarget.isAtom())
124 setSchedulingPreference(Sched::ILP);
125 else if (Subtarget.is64Bit())
126 setSchedulingPreference(Sched::ILP);
127 else
128 setSchedulingPreference(Sched::RegPressure);
129 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
130 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
131
132 // Bypass expensive divides and use cheaper ones.
133 if (TM.getOptLevel() >= CodeGenOpt::Default) {
134 if (Subtarget.hasSlowDivide32())
135 addBypassSlowDiv(32, 8);
136 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
137 addBypassSlowDiv(64, 32);
138 }
139
140 // Setup Windows compiler runtime calls.
141 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
142 static const struct {
143 const RTLIB::Libcall Op;
144 const char * const Name;
145 const CallingConv::ID CC;
146 } LibraryCalls[] = {
147 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
148 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
149 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
150 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
151 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
152 };
153
154 for (const auto &LC : LibraryCalls) {
155 setLibcallName(LC.Op, LC.Name);
156 setLibcallCallingConv(LC.Op, LC.CC);
157 }
158 }
159
160 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
161 // MSVCRT doesn't have powi; fall back to pow
162 setLibcallName(RTLIB::POWI_F32, nullptr);
163 setLibcallName(RTLIB::POWI_F64, nullptr);
164 }
165
166 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
167 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
168 // FIXME: Should we be limiting the atomic size on other configs? Default is
169 // 1024.
170 if (!Subtarget.canUseCMPXCHG8B())
171 setMaxAtomicSizeInBitsSupported(32);
172
173 // Set up the register classes.
174 addRegisterClass(MVT::i8, &X86::GR8RegClass);
175 addRegisterClass(MVT::i16, &X86::GR16RegClass);
176 addRegisterClass(MVT::i32, &X86::GR32RegClass);
177 if (Subtarget.is64Bit())
178 addRegisterClass(MVT::i64, &X86::GR64RegClass);
179
180 for (MVT VT : MVT::integer_valuetypes())
181 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
182
183 // We don't accept any truncstore of integer registers.
184 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
185 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
186 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
187 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
188 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
189 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
190
191 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192
193 // SETOEQ and SETUNE require checking two conditions.
194 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
195 setCondCodeAction(ISD::SETOEQ, VT, Expand);
196 setCondCodeAction(ISD::SETUNE, VT, Expand);
197 }
198
199 // Integer absolute.
200 if (Subtarget.canUseCMOV()) {
201 setOperationAction(ISD::ABS , MVT::i16 , Custom);
202 setOperationAction(ISD::ABS , MVT::i32 , Custom);
203 if (Subtarget.is64Bit())
204 setOperationAction(ISD::ABS , MVT::i64 , Custom);
205 }
206
207 // Signed saturation subtraction.
208 setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
209 setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
210 setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
211 if (Subtarget.is64Bit())
212 setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
213
214 // Funnel shifts.
215 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
216 // For slow shld targets we only lower for code size.
217 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
218
219 setOperationAction(ShiftOp , MVT::i8 , Custom);
220 setOperationAction(ShiftOp , MVT::i16 , Custom);
221 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
222 if (Subtarget.is64Bit())
223 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
224 }
225
226 if (!Subtarget.useSoftFloat()) {
227 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
228 // operation.
229 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
230 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
231 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
232 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
233 // We have an algorithm for SSE2, and we turn this into a 64-bit
234 // FILD or VCVTUSI2SS/SD for other targets.
235 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
236 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
237 // We have an algorithm for SSE2->double, and we turn this into a
238 // 64-bit FILD followed by conditional FADD for other targets.
239 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
240 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
241
242 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
243 // this operation.
244 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
245 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
246 // SSE has no i16 to fp conversion, only i32. We promote in the handler
247 // to allow f80 to use i16 and f64 to use i16 with sse1 only
248 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
249 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
250 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
251 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
252 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
253 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
254 // are Legal, f80 is custom lowered.
255 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
256 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
257
258 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
259 // this operation.
260 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
261 // FIXME: This doesn't generate invalid exception when it should. PR44019.
262 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
263 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
264 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
265 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
267 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
268 // are Legal, f80 is custom lowered.
269 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
270 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
271
272 // Handle FP_TO_UINT by promoting the destination to a larger signed
273 // conversion.
274 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
275 // FIXME: This doesn't generate invalid exception when it should. PR44019.
276 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
277 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
278 // FIXME: This doesn't generate invalid exception when it should. PR44019.
279 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
280 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
281 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
283 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
284
285 setOperationAction(ISD::LRINT, MVT::f32, Custom);
286 setOperationAction(ISD::LRINT, MVT::f64, Custom);
287 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
288 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
289
290 if (!Subtarget.is64Bit()) {
291 setOperationAction(ISD::LRINT, MVT::i64, Custom);
292 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
293 }
294 }
295
296 if (Subtarget.hasSSE2()) {
297 // Custom lowering for saturating float to int conversions.
298 // We handle promotion to larger result types manually.
299 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
300 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
301 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
302 }
303 if (Subtarget.is64Bit()) {
304 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
305 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
306 }
307 }
308
309 // Handle address space casts between mixed sized pointers.
310 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
311 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
312
313 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
314 if (!Subtarget.hasSSE2()) {
315 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
316 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
317 if (Subtarget.is64Bit()) {
318 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
319 // Without SSE, i64->f64 goes through memory.
320 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
321 }
322 } else if (!Subtarget.is64Bit())
323 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
324
325 // Scalar integer divide and remainder are lowered to use operations that
326 // produce two results, to match the available instructions. This exposes
327 // the two-result form to trivial CSE, which is able to combine x/y and x%y
328 // into a single instruction.
329 //
330 // Scalar integer multiply-high is also lowered to use two-result
331 // operations, to match the available instructions. However, plain multiply
332 // (low) operations are left as Legal, as there are single-result
333 // instructions for this in x86. Using the two-result multiply instructions
334 // when both high and low results are needed must be arranged by dagcombine.
335 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
336 setOperationAction(ISD::MULHS, VT, Expand);
337 setOperationAction(ISD::MULHU, VT, Expand);
338 setOperationAction(ISD::SDIV, VT, Expand);
339 setOperationAction(ISD::UDIV, VT, Expand);
340 setOperationAction(ISD::SREM, VT, Expand);
341 setOperationAction(ISD::UREM, VT, Expand);
342 }
343
344 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
345 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
346 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
347 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
348 setOperationAction(ISD::BR_CC, VT, Expand);
349 setOperationAction(ISD::SELECT_CC, VT, Expand);
350 }
351 if (Subtarget.is64Bit())
352 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
353 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
354 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
356
357 setOperationAction(ISD::FREM , MVT::f32 , Expand);
358 setOperationAction(ISD::FREM , MVT::f64 , Expand);
359 setOperationAction(ISD::FREM , MVT::f80 , Expand);
360 setOperationAction(ISD::FREM , MVT::f128 , Expand);
361
362 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
363 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
364 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
365 }
366
367 // Promote the i8 variants and force them on up to i32 which has a shorter
368 // encoding.
369 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
370 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
371 // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
372 // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
373 // promote that too.
374 setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
375 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
376
377 if (!Subtarget.hasBMI()) {
378 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
379 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
380 if (Subtarget.is64Bit()) {
381 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
382 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
383 }
384 }
385
386 if (Subtarget.hasLZCNT()) {
387 // When promoting the i8 variants, force them to i32 for a shorter
388 // encoding.
389 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
390 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
391 } else {
392 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
393 if (VT == MVT::i64 && !Subtarget.is64Bit())
394 continue;
395 setOperationAction(ISD::CTLZ , VT, Custom);
396 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
397 }
398 }
399
400 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
401 ISD::STRICT_FP_TO_FP16}) {
402 // Special handling for half-precision floating point conversions.
403 // If we don't have F16C support, then lower half float conversions
404 // into library calls.
405 setOperationAction(
406 Op, MVT::f32,
407 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
408 // There's never any support for operations beyond MVT::f32.
409 setOperationAction(Op, MVT::f64, Expand);
410 setOperationAction(Op, MVT::f80, Expand);
411 setOperationAction(Op, MVT::f128, Expand);
412 }
413
414 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
415 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
416 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
417 setTruncStoreAction(VT, MVT::f16, Expand);
418 setTruncStoreAction(VT, MVT::bf16, Expand);
419
420 setOperationAction(ISD::BF16_TO_FP, VT, Expand);
421 setOperationAction(ISD::FP_TO_BF16, VT, Custom);
422 }
423
424 setOperationAction(ISD::PARITY, MVT::i8, Custom);
425 setOperationAction(ISD::PARITY, MVT::i16, Custom);
426 setOperationAction(ISD::PARITY, MVT::i32, Custom);
427 if (Subtarget.is64Bit())
428 setOperationAction(ISD::PARITY, MVT::i64, Custom);
429 if (Subtarget.hasPOPCNT()) {
430 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
431 // popcntw is longer to encode than popcntl and also has a false dependency
432 // on the dest that popcntl hasn't had since Cannon Lake.
433 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
434 } else {
435 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
436 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
437 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
438 if (Subtarget.is64Bit())
439 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
440 else
441 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
442 }
443
444 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
445
446 if (!Subtarget.hasMOVBE())
447 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
448
449 // X86 wants to expand cmov itself.
450 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
451 setOperationAction(ISD::SELECT, VT, Custom);
452 setOperationAction(ISD::SETCC, VT, Custom);
453 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
454 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
455 }
456 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SELECT, VT, Custom);
460 setOperationAction(ISD::SETCC, VT, Custom);
461 }
462
463 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
464 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
465 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
466
467 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
468 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
469 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
470 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
471 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
472 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
473 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
474 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
475
476 // Darwin ABI issue.
477 for (auto VT : { MVT::i32, MVT::i64 }) {
478 if (VT == MVT::i64 && !Subtarget.is64Bit())
479 continue;
480 setOperationAction(ISD::ConstantPool , VT, Custom);
481 setOperationAction(ISD::JumpTable , VT, Custom);
482 setOperationAction(ISD::GlobalAddress , VT, Custom);
483 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
484 setOperationAction(ISD::ExternalSymbol , VT, Custom);
485 setOperationAction(ISD::BlockAddress , VT, Custom);
486 }
487
488 // 64-bit shl, sra, srl (iff 32-bit x86)
489 for (auto VT : { MVT::i32, MVT::i64 }) {
490 if (VT == MVT::i64 && !Subtarget.is64Bit())
491 continue;
492 setOperationAction(ISD::SHL_PARTS, VT, Custom);
493 setOperationAction(ISD::SRA_PARTS, VT, Custom);
494 setOperationAction(ISD::SRL_PARTS, VT, Custom);
495 }
496
497 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
498 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
499
500 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
501
502 // Expand certain atomics
503 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
504 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
505 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
506 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
507 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
508 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
509 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
510 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
511 }
512
513 if (!Subtarget.is64Bit())
514 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
515
516 if (Subtarget.canUseCMPXCHG16B())
517 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
518
519 // FIXME - use subtarget debug flags
520 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
521 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
522 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
523 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
524 }
525
526 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
527 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
528
529 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
530 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
531
532 setOperationAction(ISD::TRAP, MVT::Other, Legal);
533 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
534 if (Subtarget.isTargetPS())
535 setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
536 else
537 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
538
539 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
540 setOperationAction(ISD::VASTART , MVT::Other, Custom);
541 setOperationAction(ISD::VAEND , MVT::Other, Expand);
542 bool Is64Bit = Subtarget.is64Bit();
543 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
544 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
545
546 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
547 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
548
549 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
550
551 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
552 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
553 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
554
555 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
556
557 auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
558 setOperationAction(ISD::FABS, VT, Action);
559 setOperationAction(ISD::FNEG, VT, Action);
560 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
561 setOperationAction(ISD::FREM, VT, Action);
562 setOperationAction(ISD::FMA, VT, Action);
563 setOperationAction(ISD::FMINNUM, VT, Action);
564 setOperationAction(ISD::FMAXNUM, VT, Action);
565 setOperationAction(ISD::FMINIMUM, VT, Action);
566 setOperationAction(ISD::FMAXIMUM, VT, Action);
567 setOperationAction(ISD::FSIN, VT, Action);
568 setOperationAction(ISD::FCOS, VT, Action);
569 setOperationAction(ISD::FSINCOS, VT, Action);
570 setOperationAction(ISD::FSQRT, VT, Action);
571 setOperationAction(ISD::FPOW, VT, Action);
572 setOperationAction(ISD::FLOG, VT, Action);
573 setOperationAction(ISD::FLOG2, VT, Action);
574 setOperationAction(ISD::FLOG10, VT, Action);
575 setOperationAction(ISD::FEXP, VT, Action);
576 setOperationAction(ISD::FEXP2, VT, Action);
577 setOperationAction(ISD::FCEIL, VT, Action);
578 setOperationAction(ISD::FFLOOR, VT, Action);
579 setOperationAction(ISD::FNEARBYINT, VT, Action);
580 setOperationAction(ISD::FRINT, VT, Action);
581 setOperationAction(ISD::BR_CC, VT, Action);
582 setOperationAction(ISD::SETCC, VT, Action);
583 setOperationAction(ISD::SELECT, VT, Custom);
584 setOperationAction(ISD::SELECT_CC, VT, Action);
585 setOperationAction(ISD::FROUND, VT, Action);
586 setOperationAction(ISD::FROUNDEVEN, VT, Action);
587 setOperationAction(ISD::FTRUNC, VT, Action);
588 };
589
590 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
591 // f16, f32 and f64 use SSE.
592 // Set up the FP register classes.
593 addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
594 : &X86::FR16RegClass);
595 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
596 : &X86::FR32RegClass);
597 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
598 : &X86::FR64RegClass);
599
600 // Disable f32->f64 extload as we can only generate this in one instruction
601 // under optsize. So its easier to pattern match (fpext (load)) for that
602 // case instead of needing to emit 2 instructions for extload in the
603 // non-optsize case.
604 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
605
606 for (auto VT : { MVT::f32, MVT::f64 }) {
607 // Use ANDPD to simulate FABS.
608 setOperationAction(ISD::FABS, VT, Custom);
609
610 // Use XORP to simulate FNEG.
611 setOperationAction(ISD::FNEG, VT, Custom);
612
613 // Use ANDPD and ORPD to simulate FCOPYSIGN.
614 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
615
616 // These might be better off as horizontal vector ops.
617 setOperationAction(ISD::FADD, VT, Custom);
618 setOperationAction(ISD::FSUB, VT, Custom);
619
620 // We don't support sin/cos/fmod
621 setOperationAction(ISD::FSIN , VT, Expand);
622 setOperationAction(ISD::FCOS , VT, Expand);
623 setOperationAction(ISD::FSINCOS, VT, Expand);
624 }
625
626 // Half type will be promoted by default.
627 setF16Action(MVT::f16, Promote);
628 setOperationAction(ISD::FADD, MVT::f16, Promote);
629 setOperationAction(ISD::FSUB, MVT::f16, Promote);
630 setOperationAction(ISD::FMUL, MVT::f16, Promote);
631 setOperationAction(ISD::FDIV, MVT::f16, Promote);
632 setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall);
633 setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall);
634 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
635
636 setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote);
637 setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote);
638 setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote);
639 setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote);
640 setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote);
641 setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote);
642 setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote);
643 setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote);
644 setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
645 setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
646 setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
647 setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
648 setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
649 setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
650 setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote);
651 setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote);
652 setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote);
653 setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote);
654 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote);
655 setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote);
656 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote);
657 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote);
658 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
659 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
660 setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
661 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall);
662 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, LibCall);
663 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
664
665 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
666 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
667
668 // Lower this to MOVMSK plus an AND.
669 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
670 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
671
672 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
673 (UseX87 || Is64Bit)) {
674 // Use SSE for f32, x87 for f64.
675 // Set up the FP register classes.
676 addRegisterClass(MVT::f32, &X86::FR32RegClass);
677 if (UseX87)
678 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
679
680 // Use ANDPS to simulate FABS.
681 setOperationAction(ISD::FABS , MVT::f32, Custom);
682
683 // Use XORP to simulate FNEG.
684 setOperationAction(ISD::FNEG , MVT::f32, Custom);
685
686 if (UseX87)
687 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
688
689 // Use ANDPS and ORPS to simulate FCOPYSIGN.
690 if (UseX87)
691 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
692 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
693
694 // We don't support sin/cos/fmod
695 setOperationAction(ISD::FSIN , MVT::f32, Expand);
696 setOperationAction(ISD::FCOS , MVT::f32, Expand);
697 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
698
699 if (UseX87) {
700 // Always expand sin/cos functions even though x87 has an instruction.
701 setOperationAction(ISD::FSIN, MVT::f64, Expand);
702 setOperationAction(ISD::FCOS, MVT::f64, Expand);
703 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
704 }
705 } else if (UseX87) {
706 // f32 and f64 in x87.
707 // Set up the FP register classes.
708 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
709 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
710
711 for (auto VT : { MVT::f32, MVT::f64 }) {
712 setOperationAction(ISD::UNDEF, VT, Expand);
713 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
714
715 // Always expand sin/cos functions even though x87 has an instruction.
716 setOperationAction(ISD::FSIN , VT, Expand);
717 setOperationAction(ISD::FCOS , VT, Expand);
718 setOperationAction(ISD::FSINCOS, VT, Expand);
719 }
720 }
721
722 // Expand FP32 immediates into loads from the stack, save special cases.
723 if (isTypeLegal(MVT::f32)) {
724 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
725 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
726 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
727 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
728 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
729 } else // SSE immediates.
730 addLegalFPImmediate(APFloat(+0.0f)); // xorps
731 }
732 // Expand FP64 immediates into loads from the stack, save special cases.
733 if (isTypeLegal(MVT::f64)) {
734 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
735 addLegalFPImmediate(APFloat(+0.0)); // FLD0
736 addLegalFPImmediate(APFloat(+1.0)); // FLD1
737 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
738 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
739 } else // SSE immediates.
740 addLegalFPImmediate(APFloat(+0.0)); // xorpd
741 }
742 // Support fp16 0 immediate.
743 if (isTypeLegal(MVT::f16))
744 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
745
746 // Handle constrained floating-point operations of scalar.
747 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
748 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
749 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
750 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
751 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
752 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
753 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
754 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
755 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
756 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
757 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
758 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
759
760 // We don't support FMA.
761 setOperationAction(ISD::FMA, MVT::f64, Expand);
762 setOperationAction(ISD::FMA, MVT::f32, Expand);
763
764 // f80 always uses X87.
765 if (UseX87) {
766 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
767 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
768 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
769 {
770 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
771 addLegalFPImmediate(TmpFlt); // FLD0
772 TmpFlt.changeSign();
773 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
774
775 bool ignored;
776 APFloat TmpFlt2(+1.0);
777 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
778 &ignored);
779 addLegalFPImmediate(TmpFlt2); // FLD1
780 TmpFlt2.changeSign();
781 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
782 }
783
784 // Always expand sin/cos functions even though x87 has an instruction.
785 setOperationAction(ISD::FSIN , MVT::f80, Expand);
786 setOperationAction(ISD::FCOS , MVT::f80, Expand);
787 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
788
789 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
790 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
791 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
792 setOperationAction(ISD::FRINT, MVT::f80, Expand);
793 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
794 setOperationAction(ISD::FMA, MVT::f80, Expand);
795 setOperationAction(ISD::LROUND, MVT::f80, Expand);
796 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
797 setOperationAction(ISD::LRINT, MVT::f80, Custom);
798 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
799
800 // Handle constrained floating-point operations of scalar.
801 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
802 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
803 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
804 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
805 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
806 if (isTypeLegal(MVT::f16)) {
807 setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
808 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
809 } else {
810 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
811 }
812 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
813 // as Custom.
814 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
815 }
816
817 // f128 uses xmm registers, but most operations require libcalls.
818 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
819 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
820 : &X86::VR128RegClass);
821
822 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
823
824 setOperationAction(ISD::FADD, MVT::f128, LibCall);
825 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
826 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
827 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
828 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
829 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
830 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
831 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
832 setOperationAction(ISD::FMA, MVT::f128, LibCall);
833 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
834
835 setOperationAction(ISD::FABS, MVT::f128, Custom);
836 setOperationAction(ISD::FNEG, MVT::f128, Custom);
837 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
838
839 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
840 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
841 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
842 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
843 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
844 // No STRICT_FSINCOS
845 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
846 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
847
848 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
849 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
850 // We need to custom handle any FP_ROUND with an f128 input, but
851 // LegalizeDAG uses the result type to know when to run a custom handler.
852 // So we have to list all legal floating point result types here.
853 if (isTypeLegal(MVT::f32)) {
854 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
855 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
856 }
857 if (isTypeLegal(MVT::f64)) {
858 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
859 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
860 }
861 if (isTypeLegal(MVT::f80)) {
862 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
863 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
864 }
865
866 setOperationAction(ISD::SETCC, MVT::f128, Custom);
867
868 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
869 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
870 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
871 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
872 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
873 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
874 }
875
876 // Always use a library call for pow.
877 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
878 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
879 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
880 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
881
882 setOperationAction(ISD::FLOG, MVT::f80, Expand);
883 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
884 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
885 setOperationAction(ISD::FEXP, MVT::f80, Expand);
886 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
887 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
888 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
889
890 // Some FP actions are always expanded for vector types.
891 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
892 MVT::v4f32, MVT::v8f32, MVT::v16f32,
893 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
894 setOperationAction(ISD::FSIN, VT, Expand);
895 setOperationAction(ISD::FSINCOS, VT, Expand);
896 setOperationAction(ISD::FCOS, VT, Expand);
897 setOperationAction(ISD::FREM, VT, Expand);
898 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
899 setOperationAction(ISD::FPOW, VT, Expand);
900 setOperationAction(ISD::FLOG, VT, Expand);
901 setOperationAction(ISD::FLOG2, VT, Expand);
902 setOperationAction(ISD::FLOG10, VT, Expand);
903 setOperationAction(ISD::FEXP, VT, Expand);
904 setOperationAction(ISD::FEXP2, VT, Expand);
905 }
906
907 // First set operation action for all vector types to either promote
908 // (for widening) or expand (for scalarization). Then we will selectively
909 // turn on ones that can be effectively codegen'd.
910 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
911 setOperationAction(ISD::SDIV, VT, Expand);
912 setOperationAction(ISD::UDIV, VT, Expand);
913 setOperationAction(ISD::SREM, VT, Expand);
914 setOperationAction(ISD::UREM, VT, Expand);
915 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
916 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
917 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
918 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
919 setOperationAction(ISD::FMA, VT, Expand);
920 setOperationAction(ISD::FFLOOR, VT, Expand);
921 setOperationAction(ISD::FCEIL, VT, Expand);
922 setOperationAction(ISD::FTRUNC, VT, Expand);
923 setOperationAction(ISD::FRINT, VT, Expand);
924 setOperationAction(ISD::FNEARBYINT, VT, Expand);
925 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
926 setOperationAction(ISD::MULHS, VT, Expand);
927 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
928 setOperationAction(ISD::MULHU, VT, Expand);
929 setOperationAction(ISD::SDIVREM, VT, Expand);
930 setOperationAction(ISD::UDIVREM, VT, Expand);
931 setOperationAction(ISD::CTPOP, VT, Expand);
932 setOperationAction(ISD::CTTZ, VT, Expand);
933 setOperationAction(ISD::CTLZ, VT, Expand);
934 setOperationAction(ISD::ROTL, VT, Expand);
935 setOperationAction(ISD::ROTR, VT, Expand);
936 setOperationAction(ISD::BSWAP, VT, Expand);
937 setOperationAction(ISD::SETCC, VT, Expand);
938 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
939 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
940 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
941 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
942 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
943 setOperationAction(ISD::TRUNCATE, VT, Expand);
944 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
945 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
946 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
947 setOperationAction(ISD::SELECT_CC, VT, Expand);
948 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
949 setTruncStoreAction(InnerVT, VT, Expand);
950
951 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
952 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
953
954 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
955 // types, we have to deal with them whether we ask for Expansion or not.
956 // Setting Expand causes its own optimisation problems though, so leave
957 // them legal.
958 if (VT.getVectorElementType() == MVT::i1)
959 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
960
961 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
962 // split/scalarized right now.
963 if (VT.getVectorElementType() == MVT::f16 ||
964 VT.getVectorElementType() == MVT::bf16)
965 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
966 }
967 }
968
969 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
970 // with -msoft-float, disable use of MMX as well.
971 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
972 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
973 // No operations on x86mmx supported, everything uses intrinsics.
974 }
975
976 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
977 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
978 : &X86::VR128RegClass);
979
980 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
981 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
982 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
983 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
984 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
985 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
986 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
987 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
988
989 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
990 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
991
992 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
993 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
994 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
995 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
996 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
997 }
998
999 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
1000 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1001 : &X86::VR128RegClass);
1002
1003 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
1004 // registers cannot be used even for integer operations.
1005 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
1006 : &X86::VR128RegClass);
1007 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1008 : &X86::VR128RegClass);
1009 addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
1010 : &X86::VR128RegClass);
1011 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
1012 : &X86::VR128RegClass);
1013 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
1014 : &X86::VR128RegClass);
1015
1016 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1017 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1018 setOperationAction(ISD::SDIV, VT, Custom);
1019 setOperationAction(ISD::SREM, VT, Custom);
1020 setOperationAction(ISD::UDIV, VT, Custom);
1021 setOperationAction(ISD::UREM, VT, Custom);
1022 }
1023
1024 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
1025 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
1026 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
1027
1028 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1029 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1030 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1031 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
1032 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
1033 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
1034 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
1035 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
1036 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
1037 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1038 setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
1039 setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
1040
1041 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
1042 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
1043 setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1044
1045 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1046 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1047 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
1048
1049 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1050 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1051 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
1052 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
1053 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
1054 }
1055
1056 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
1057 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
1058 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
1059 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
1060 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
1061 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
1062 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
1063 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
1064 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
1065 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
1066
1067 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1068 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1069 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1070 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1071
1072 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1073 setOperationAction(ISD::SETCC, VT, Custom);
1074 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1075 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1076 setOperationAction(ISD::CTPOP, VT, Custom);
1077 setOperationAction(ISD::ABS, VT, Custom);
1078
1079 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1080 // setcc all the way to isel and prefer SETGT in some isel patterns.
1081 setCondCodeAction(ISD::SETLT, VT, Custom);
1082 setCondCodeAction(ISD::SETLE, VT, Custom);
1083 }
1084
1085 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1086 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1087 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1088 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1089 setOperationAction(ISD::VSELECT, VT, Custom);
1090 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1091 }
1092
1093 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1094 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1095 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1096 setOperationAction(ISD::VSELECT, VT, Custom);
1097
1098 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1099 continue;
1100
1101 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1102 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1103 }
1104 setF16Action(MVT::v8f16, Expand);
1105 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
1106 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
1107 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
1108 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
1109
1110 // Custom lower v2i64 and v2f64 selects.
1111 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1112 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1113 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1114 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1115 setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
1116 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1117
1118 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
1119 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1120 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1121 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1122 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
1123 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1124
1125 // Custom legalize these to avoid over promotion or custom promotion.
1126 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1127 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1128 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1129 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1130 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1131 }
1132
1133 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1134 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
1135 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1136 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1137
1138 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1139 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1140
1141 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1142 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1143
1144 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1145 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1146 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1147 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1148 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1149
1150 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1151 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1152 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1153 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1154
1155 // We want to legalize this to an f64 load rather than an i64 load on
1156 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1157 // store.
1158 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1159 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1160 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1161 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1162 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1163 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1164
1165 // Add 32-bit vector stores to help vectorization opportunities.
1166 setOperationAction(ISD::STORE, MVT::v2i16, Custom);
1167 setOperationAction(ISD::STORE, MVT::v4i8, Custom);
1168
1169 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1170 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1171 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1172 if (!Subtarget.hasAVX512())
1173 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1174
1175 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1176 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1177 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1178
1179 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1180
1181 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1182 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1183 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1184 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1185 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1186 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1187
1188 // In the customized shift lowering, the legal v4i32/v2i64 cases
1189 // in AVX2 will be recognized.
1190 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1191 setOperationAction(ISD::SRL, VT, Custom);
1192 setOperationAction(ISD::SHL, VT, Custom);
1193 setOperationAction(ISD::SRA, VT, Custom);
1194 if (VT == MVT::v2i64) continue;
1195 setOperationAction(ISD::ROTL, VT, Custom);
1196 setOperationAction(ISD::ROTR, VT, Custom);
1197 setOperationAction(ISD::FSHL, VT, Custom);
1198 setOperationAction(ISD::FSHR, VT, Custom);
1199 }
1200
1201 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1202 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1203 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1204 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1205 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1206 }
1207
1208 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1209 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1210 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1211 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1212 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1213 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1214 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1215 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1216 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1217
1218 // These might be better off as horizontal vector ops.
1219 setOperationAction(ISD::ADD, MVT::i16, Custom);
1220 setOperationAction(ISD::ADD, MVT::i32, Custom);
1221 setOperationAction(ISD::SUB, MVT::i16, Custom);
1222 setOperationAction(ISD::SUB, MVT::i32, Custom);
1223 }
1224
1225 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1226 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1227 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1228 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1229 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1230 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1231 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1232 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1233 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1234 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1235 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1236 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1237 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1238 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1239
1240 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1241 }
1242
1243 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1244 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1245 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1246 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1247 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1248 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1249 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1250 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1251
1252 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1253 setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
1254 setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
1255
1256 // FIXME: Do we need to handle scalar-to-vector here?
1257 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1258 setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
1259
1260 // We directly match byte blends in the backend as they match the VSELECT
1261 // condition form.
1262 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1263
1264 // SSE41 brings specific instructions for doing vector sign extend even in
1265 // cases where we don't have SRA.
1266 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1267 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1268 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1269 }
1270
1271 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1272 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1273 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1274 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1275 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1276 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1277 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1278 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1279 }
1280
1281 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1282 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1283 // do the pre and post work in the vector domain.
1284 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1285 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1286 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1287 // so that DAG combine doesn't try to turn it into uint_to_fp.
1288 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1289 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1290 }
1291 }
1292
1293 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1294 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1295 }
1296
1297 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1298 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1299 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1300 setOperationAction(ISD::ROTL, VT, Custom);
1301 setOperationAction(ISD::ROTR, VT, Custom);
1302 }
1303
1304 // XOP can efficiently perform BITREVERSE with VPPERM.
1305 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1306 setOperationAction(ISD::BITREVERSE, VT, Custom);
1307
1308 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1309 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1310 setOperationAction(ISD::BITREVERSE, VT, Custom);
1311 }
1312
1313 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1314 bool HasInt256 = Subtarget.hasInt256();
1315
1316 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1317 : &X86::VR256RegClass);
1318 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1319 : &X86::VR256RegClass);
1320 addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1321 : &X86::VR256RegClass);
1322 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1323 : &X86::VR256RegClass);
1324 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1325 : &X86::VR256RegClass);
1326 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1327 : &X86::VR256RegClass);
1328 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1329 : &X86::VR256RegClass);
1330
1331 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1332 setOperationAction(ISD::FFLOOR, VT, Legal);
1333 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1334 setOperationAction(ISD::FCEIL, VT, Legal);
1335 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1336 setOperationAction(ISD::FTRUNC, VT, Legal);
1337 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1338 setOperationAction(ISD::FRINT, VT, Legal);
1339 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1340 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1341 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1342 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1343 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1344
1345 setOperationAction(ISD::FROUND, VT, Custom);
1346
1347 setOperationAction(ISD::FNEG, VT, Custom);
1348 setOperationAction(ISD::FABS, VT, Custom);
1349 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1350 }
1351
1352 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1353 // even though v8i16 is a legal type.
1354 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1355 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1356 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1357 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1358 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
1359 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1360 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
1361
1362 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
1363 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
1364 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
1365 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
1366 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
1367 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
1368
1369 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1370 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1371 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1372 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1373 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1374 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1375 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1376 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1377 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1378 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1379 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1380
1381 if (!Subtarget.hasAVX512())
1382 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1383
1384 // In the customized shift lowering, the legal v8i32/v4i64 cases
1385 // in AVX2 will be recognized.
1386 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1387 setOperationAction(ISD::SRL, VT, Custom);
1388 setOperationAction(ISD::SHL, VT, Custom);
1389 setOperationAction(ISD::SRA, VT, Custom);
1390 if (VT == MVT::v4i64) continue;
1391 setOperationAction(ISD::ROTL, VT, Custom);
1392 setOperationAction(ISD::ROTR, VT, Custom);
1393 setOperationAction(ISD::FSHL, VT, Custom);
1394 setOperationAction(ISD::FSHR, VT, Custom);
1395 }
1396
1397 // These types need custom splitting if their input is a 128-bit vector.
1398 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1399 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1400 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1401 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1402
1403 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1404 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1405 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1406 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1407 setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
1408 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1409 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1410
1411 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1412 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1413 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1414 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1415 }
1416
1417 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1418 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1419 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1420 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1421
1422 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1423 setOperationAction(ISD::SETCC, VT, Custom);
1424 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1425 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1426 setOperationAction(ISD::CTPOP, VT, Custom);
1427 setOperationAction(ISD::CTLZ, VT, Custom);
1428
1429 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1430 // setcc all the way to isel and prefer SETGT in some isel patterns.
1431 setCondCodeAction(ISD::SETLT, VT, Custom);
1432 setCondCodeAction(ISD::SETLE, VT, Custom);
1433 }
1434
1435 if (Subtarget.hasAnyFMA()) {
1436 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1437 MVT::v2f64, MVT::v4f64 }) {
1438 setOperationAction(ISD::FMA, VT, Legal);
1439 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1440 }
1441 }
1442
1443 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1444 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1445 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1446 }
1447
1448 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1449 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1450 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1451 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1452
1453 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1454 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1455 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1456 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1457 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1458 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1459 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1460 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1461
1462 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1463 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1464
1465 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1466 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1467 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1468 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1469 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1470
1471 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1472 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1473 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1474 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1475 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1476 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1477 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1478 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1479 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1480 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1481 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1482 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1483
1484 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1485 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1486 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1487 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1488 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1489 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1490 }
1491
1492 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1493 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1494 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1495 }
1496
1497 if (HasInt256) {
1498 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1499 // when we have a 256bit-wide blend with immediate.
1500 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1501 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1502
1503 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1504 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1505 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1506 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1507 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1508 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1509 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1510 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1511 }
1512 }
1513
1514 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1515 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1516 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1517 setOperationAction(ISD::MSTORE, VT, Legal);
1518 }
1519
1520 // Extract subvector is special because the value type
1521 // (result) is 128-bit but the source is 256-bit wide.
1522 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1523 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1524 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1525 }
1526
1527 // Custom lower several nodes for 256-bit types.
1528 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1529 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1530 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1531 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1532 setOperationAction(ISD::VSELECT, VT, Custom);
1533 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1534 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1535 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1536 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1537 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1538 setOperationAction(ISD::STORE, VT, Custom);
1539 }
1540 setF16Action(MVT::v16f16, Expand);
1541 setOperationAction(ISD::FADD, MVT::v16f16, Expand);
1542 setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
1543 setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
1544 setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
1545
1546 if (HasInt256) {
1547 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1548
1549 // Custom legalize 2x32 to get a little better code.
1550 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1551 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1552
1553 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1554 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1555 setOperationAction(ISD::MGATHER, VT, Custom);
1556 }
1557 }
1558
1559 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1560 Subtarget.hasF16C()) {
1561 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1562 setOperationAction(ISD::FP_ROUND, VT, Custom);
1563 setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1564 }
1565 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) {
1566 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1567 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
1568 }
1569 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1570 setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
1571 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1572 }
1573
1574 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1575 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
1576 }
1577
1578 // This block controls legalization of the mask vector sizes that are
1579 // available with AVX512. 512-bit vectors are in a separate block controlled
1580 // by useAVX512Regs.
1581 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1582 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1583 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1584 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1585 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1586 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1587
1588 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1589 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1590 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1591
1592 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1593 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1594 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1595 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1596 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1597 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1598 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1599 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1600 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1601 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1602 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1603 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1604
1605 // There is no byte sized k-register load or store without AVX512DQ.
1606 if (!Subtarget.hasDQI()) {
1607 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1608 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1609 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1610 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1611
1612 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1613 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1614 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1615 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1616 }
1617
1618 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1619 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1620 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1621 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1622 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1623 }
1624
1625 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1626 setOperationAction(ISD::VSELECT, VT, Expand);
1627
1628 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1629 setOperationAction(ISD::SETCC, VT, Custom);
1630 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1631 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1632 setOperationAction(ISD::SELECT, VT, Custom);
1633 setOperationAction(ISD::TRUNCATE, VT, Custom);
1634
1635 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1636 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1637 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1638 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1639 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1640 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1641 }
1642
1643 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1644 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1645 }
1646
1647 // This block controls legalization for 512-bit operations with 32/64 bit
1648 // elements. 512-bits can be disabled based on prefer-vector-width and
1649 // required-vector-width function attributes.
1650 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1651 bool HasBWI = Subtarget.hasBWI();
1652
1653 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1654 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1655 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1656 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1657 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1658 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1659 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1660
1661 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1662 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1663 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1664 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1665 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1666 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1667 if (HasBWI)
1668 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1669 }
1670
1671 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1672 setOperationAction(ISD::FNEG, VT, Custom);
1673 setOperationAction(ISD::FABS, VT, Custom);
1674 setOperationAction(ISD::FMA, VT, Legal);
1675 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1676 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1677 }
1678
1679 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1680 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1681 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1682 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1683 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1684 }
1685 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom);
1686 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom);
1687 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom);
1688 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom);
1689 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
1690 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
1691 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
1692 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
1693 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
1694 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
1695
1696 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1697 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1698 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1699 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1700 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1701 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1702 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1703 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1704 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1705 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1706 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1707
1708 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1709 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1710 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1711 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1712 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1713 if (HasBWI)
1714 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1715
1716 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1717 // to 512-bit rather than use the AVX2 instructions so that we can use
1718 // k-masks.
1719 if (!Subtarget.hasVLX()) {
1720 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1721 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1722 setOperationAction(ISD::MLOAD, VT, Custom);
1723 setOperationAction(ISD::MSTORE, VT, Custom);
1724 }
1725 }
1726
1727 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1728 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1729 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1730 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1731 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1732 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1733 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1734 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1735 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1736 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1737 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1738 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1739 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1740
1741 if (HasBWI) {
1742 // Extends from v64i1 masks to 512-bit vectors.
1743 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1744 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1745 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1746 }
1747
1748 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1749 setOperationAction(ISD::FFLOOR, VT, Legal);
1750 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1751 setOperationAction(ISD::FCEIL, VT, Legal);
1752 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1753 setOperationAction(ISD::FTRUNC, VT, Legal);
1754 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1755 setOperationAction(ISD::FRINT, VT, Legal);
1756 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1757 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1758 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1759 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1760 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1761
1762 setOperationAction(ISD::FROUND, VT, Custom);
1763 }
1764
1765 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1766 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1767 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1768 }
1769
1770 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1771 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1772 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1773 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1774
1775 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1776 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1777 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1778 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1779
1780 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1781 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1782 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1783 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1784 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1785 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1786 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1787 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1788
1789 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1790 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1791
1792 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1793
1794 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1795 setOperationAction(ISD::SRL, VT, Custom);
1796 setOperationAction(ISD::SHL, VT, Custom);
1797 setOperationAction(ISD::SRA, VT, Custom);
1798 setOperationAction(ISD::ROTL, VT, Custom);
1799 setOperationAction(ISD::ROTR, VT, Custom);
1800 setOperationAction(ISD::SETCC, VT, Custom);
1801
1802 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1803 // setcc all the way to isel and prefer SETGT in some isel patterns.
1804 setCondCodeAction(ISD::SETLT, VT, Custom);
1805 setCondCodeAction(ISD::SETLE, VT, Custom);
1806 }
1807 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1808 setOperationAction(ISD::SMAX, VT, Legal);
1809 setOperationAction(ISD::UMAX, VT, Legal);
1810 setOperationAction(ISD::SMIN, VT, Legal);
1811 setOperationAction(ISD::UMIN, VT, Legal);
1812 setOperationAction(ISD::ABS, VT, Legal);
1813 setOperationAction(ISD::CTPOP, VT, Custom);
1814 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1815 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1816 }
1817
1818 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1819 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1820 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1821 setOperationAction(ISD::CTLZ, VT, Custom);
1822 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1823 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1824 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1825 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1826 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1827 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1828 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1829 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1830 }
1831
1832 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1833 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1834 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1835 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1836 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1837 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1838
1839 if (Subtarget.hasDQI()) {
1840 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1841 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1842 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
1843 setOperationAction(Opc, MVT::v8i64, Custom);
1844 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1845 }
1846
1847 if (Subtarget.hasCDI()) {
1848 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1849 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1850 setOperationAction(ISD::CTLZ, VT, Legal);
1851 }
1852 } // Subtarget.hasCDI()
1853
1854 if (Subtarget.hasVPOPCNTDQ()) {
1855 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1856 setOperationAction(ISD::CTPOP, VT, Legal);
1857 }
1858
1859 // Extract subvector is special because the value type
1860 // (result) is 256-bit but the source is 512-bit wide.
1861 // 128-bit was made Legal under AVX1.
1862 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1863 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1864 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1865
1866 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1867 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
1868 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1869 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1870 setOperationAction(ISD::SELECT, VT, Custom);
1871 setOperationAction(ISD::VSELECT, VT, Custom);
1872 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1873 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1874 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1875 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1876 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1877 }
1878 setF16Action(MVT::v32f16, Expand);
1879 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
1880 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom);
1881 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
1882 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
1883 for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
1884 setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
1885 setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
1886 }
1887
1888 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1889 setOperationAction(ISD::MLOAD, VT, Legal);
1890 setOperationAction(ISD::MSTORE, VT, Legal);
1891 setOperationAction(ISD::MGATHER, VT, Custom);
1892 setOperationAction(ISD::MSCATTER, VT, Custom);
1893 }
1894 if (HasBWI) {
1895 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1896 setOperationAction(ISD::MLOAD, VT, Legal);
1897 setOperationAction(ISD::MSTORE, VT, Legal);
1898 }
1899 } else {
1900 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1901 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1902 }
1903
1904 if (Subtarget.hasVBMI2()) {
1905 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1906 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1907 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1908 setOperationAction(ISD::FSHL, VT, Custom);
1909 setOperationAction(ISD::FSHR, VT, Custom);
1910 }
1911
1912 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1913 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1914 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1915 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1916 }
1917 }// useAVX512Regs
1918
1919 // This block controls legalization for operations that don't have
1920 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1921 // narrower widths.
1922 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1923 // These operations are handled on non-VLX by artificially widening in
1924 // isel patterns.
1925
1926 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
1927 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
1928 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1929
1930 if (Subtarget.hasDQI()) {
1931 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1932 // v2f32 UINT_TO_FP is already custom under SSE2.
1933 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1935, __extension__
__PRETTY_FUNCTION__))
1934 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1935, __extension__
__PRETTY_FUNCTION__))
1935 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1935, __extension__
__PRETTY_FUNCTION__))
;
1936 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1937 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1938 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1939 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1940 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1941 }
1942
1943 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1944 setOperationAction(ISD::SMAX, VT, Legal);
1945 setOperationAction(ISD::UMAX, VT, Legal);
1946 setOperationAction(ISD::SMIN, VT, Legal);
1947 setOperationAction(ISD::UMIN, VT, Legal);
1948 setOperationAction(ISD::ABS, VT, Legal);
1949 }
1950
1951 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1952 setOperationAction(ISD::ROTL, VT, Custom);
1953 setOperationAction(ISD::ROTR, VT, Custom);
1954 }
1955
1956 // Custom legalize 2x32 to get a little better code.
1957 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1958 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1959
1960 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1961 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1962 setOperationAction(ISD::MSCATTER, VT, Custom);
1963
1964 if (Subtarget.hasDQI()) {
1965 for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1966 ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1967 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) {
1968 setOperationAction(Opc, MVT::v2i64, Custom);
1969 setOperationAction(Opc, MVT::v4i64, Custom);
1970 }
1971 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1972 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1973 }
1974
1975 if (Subtarget.hasCDI()) {
1976 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1977 setOperationAction(ISD::CTLZ, VT, Legal);
1978 }
1979 } // Subtarget.hasCDI()
1980
1981 if (Subtarget.hasVPOPCNTDQ()) {
1982 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1983 setOperationAction(ISD::CTPOP, VT, Legal);
1984 }
1985 }
1986
1987 // This block control legalization of v32i1/v64i1 which are available with
1988 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1989 // useBWIRegs.
1990 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1991 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1992 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1993
1994 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1995 setOperationAction(ISD::VSELECT, VT, Expand);
1996 setOperationAction(ISD::TRUNCATE, VT, Custom);
1997 setOperationAction(ISD::SETCC, VT, Custom);
1998 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1999 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
2000 setOperationAction(ISD::SELECT, VT, Custom);
2001 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2002 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2003 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
2004 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
2005 }
2006
2007 for (auto VT : { MVT::v16i1, MVT::v32i1 })
2008 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
2009
2010 // Extends from v32i1 masks to 256-bit vectors.
2011 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
2012 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
2013 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
2014
2015 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2016 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
2017 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
2018 }
2019
2020 // These operations are handled on non-VLX by artificially widening in
2021 // isel patterns.
2022 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
2023
2024 if (Subtarget.hasBITALG()) {
2025 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2026 setOperationAction(ISD::CTPOP, VT, Legal);
2027 }
2028 }
2029
2030 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2031 auto setGroup = [&] (MVT VT) {
2032 setOperationAction(ISD::FADD, VT, Legal);
2033 setOperationAction(ISD::STRICT_FADD, VT, Legal);
2034 setOperationAction(ISD::FSUB, VT, Legal);
2035 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
2036 setOperationAction(ISD::FMUL, VT, Legal);
2037 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
2038 setOperationAction(ISD::FDIV, VT, Legal);
2039 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
2040 setOperationAction(ISD::FSQRT, VT, Legal);
2041 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
2042
2043 setOperationAction(ISD::FFLOOR, VT, Legal);
2044 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
2045 setOperationAction(ISD::FCEIL, VT, Legal);
2046 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
2047 setOperationAction(ISD::FTRUNC, VT, Legal);
2048 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
2049 setOperationAction(ISD::FRINT, VT, Legal);
2050 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
2051 setOperationAction(ISD::FNEARBYINT, VT, Legal);
2052 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
2053
2054 setOperationAction(ISD::LOAD, VT, Legal);
2055 setOperationAction(ISD::STORE, VT, Legal);
2056
2057 setOperationAction(ISD::FMA, VT, Legal);
2058 setOperationAction(ISD::STRICT_FMA, VT, Legal);
2059 setOperationAction(ISD::VSELECT, VT, Legal);
2060 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
2061 setOperationAction(ISD::SELECT, VT, Custom);
2062
2063 setOperationAction(ISD::FNEG, VT, Custom);
2064 setOperationAction(ISD::FABS, VT, Custom);
2065 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
2066 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
2067 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
2068 };
2069
2070 // AVX512_FP16 scalar operations
2071 setGroup(MVT::f16);
2072 setOperationAction(ISD::FREM, MVT::f16, Promote);
2073 setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
2074 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
2075 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
2076 setOperationAction(ISD::SETCC, MVT::f16, Custom);
2077 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
2078 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
2079 setOperationAction(ISD::FROUND, MVT::f16, Custom);
2080 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
2081 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
2082 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
2083 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
2084 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
2085 setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
2086 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
2087
2088 setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
2089 setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
2090
2091 if (Subtarget.useAVX512Regs()) {
2092 setGroup(MVT::v32f16);
2093 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
2094 setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
2095 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
2096 setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
2097 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
2098 setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
2099 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
2100 setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
2101 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
2102 setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal);
2103 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
2104 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
2105
2106 setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
2107 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
2108 setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
2109 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
2110 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
2111 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
2112 MVT::v32i16);
2113 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2114 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
2115 MVT::v32i16);
2116 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2117 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
2118 MVT::v32i16);
2119 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2120 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
2121 MVT::v32i16);
2122
2123 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
2124 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
2125 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
2126
2127 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2128 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2129
2130 setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
2131 setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
2132 }
2133
2134 if (Subtarget.hasVLX()) {
2135 setGroup(MVT::v8f16);
2136 setGroup(MVT::v16f16);
2137
2138 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
2139 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
2140 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
2141 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
2142 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
2143 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
2144 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
2145 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
2146 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
2147 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
2148
2149 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
2150 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
2151 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
2152 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
2153 setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
2154 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
2155 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
2156 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
2157 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
2158 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
2159
2160 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2161 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
2162 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
2163
2164 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
2165 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
2166 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
2167
2168 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2169 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2170 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2171 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2172
2173 // Need to custom widen these to prevent scalarization.
2174 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2175 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2176 }
2177 }
2178
2179 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2180 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2181 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2182 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2183 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2184 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2185
2186 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2187 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2188 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2189 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2190 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2191
2192 if (Subtarget.hasBWI()) {
2193 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2194 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2195 }
2196
2197 if (Subtarget.hasFP16()) {
2198 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2199 setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
2200 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
2201 setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
2202 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
2203 setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
2204 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
2205 setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
2206 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
2207 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2208 setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
2209 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
2210 setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
2211 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
2212 setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
2213 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
2214 setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
2215 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
2216 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2217 setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
2218 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
2219 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
2220 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
2221 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2222 setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
2223 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
2224 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
2225 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
2226 }
2227
2228 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
2229 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
2230 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
2231 }
2232
2233 if (Subtarget.hasAMXTILE()) {
2234 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2235 }
2236
2237 // We want to custom lower some of our intrinsics.
2238 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
2239 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
2240 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
2241 if (!Subtarget.is64Bit()) {
2242 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
2243 }
2244
2245 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2246 // handle type legalization for these operations here.
2247 //
2248 // FIXME: We really should do custom legalization for addition and
2249 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2250 // than generic legalization for 64-bit multiplication-with-overflow, though.
2251 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2252 if (VT == MVT::i64 && !Subtarget.is64Bit())
2253 continue;
2254 // Add/Sub/Mul with overflow operations are custom lowered.
2255 setOperationAction(ISD::SADDO, VT, Custom);
2256 setOperationAction(ISD::UADDO, VT, Custom);
2257 setOperationAction(ISD::SSUBO, VT, Custom);
2258 setOperationAction(ISD::USUBO, VT, Custom);
2259 setOperationAction(ISD::SMULO, VT, Custom);
2260 setOperationAction(ISD::UMULO, VT, Custom);
2261
2262 // Support carry in as value rather than glue.
2263 setOperationAction(ISD::ADDCARRY, VT, Custom);
2264 setOperationAction(ISD::SUBCARRY, VT, Custom);
2265 setOperationAction(ISD::SETCCCARRY, VT, Custom);
2266 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
2267 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
2268 }
2269
2270 if (!Subtarget.is64Bit()) {
2271 // These libcalls are not available in 32-bit.
2272 setLibcallName(RTLIB::SHL_I128, nullptr);
2273 setLibcallName(RTLIB::SRL_I128, nullptr);
2274 setLibcallName(RTLIB::SRA_I128, nullptr);
2275 setLibcallName(RTLIB::MUL_I128, nullptr);
2276 // The MULO libcall is not part of libgcc, only compiler-rt.
2277 setLibcallName(RTLIB::MULO_I64, nullptr);
2278 }
2279 // The MULO libcall is not part of libgcc, only compiler-rt.
2280 setLibcallName(RTLIB::MULO_I128, nullptr);
2281
2282 // Combine sin / cos into _sincos_stret if it is available.
2283 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2284 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2285 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
2286 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
2287 }
2288
2289 if (Subtarget.isTargetWin64()) {
2290 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2291 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2292 setOperationAction(ISD::SREM, MVT::i128, Custom);
2293 setOperationAction(ISD::UREM, MVT::i128, Custom);
2294 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
2295 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
2296 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
2297 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
2298 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
2299 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
2300 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
2301 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
2302 }
2303
2304 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2305 // is. We should promote the value to 64-bits to solve this.
2306 // This is what the CRT headers do - `fmodf` is an inline header
2307 // function casting to f64 and calling `fmod`.
2308 if (Subtarget.is32Bit() &&
2309 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2310 for (ISD::NodeType Op :
2311 {ISD::FCEIL, ISD::STRICT_FCEIL,
2312 ISD::FCOS, ISD::STRICT_FCOS,
2313 ISD::FEXP, ISD::STRICT_FEXP,
2314 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2315 ISD::FREM, ISD::STRICT_FREM,
2316 ISD::FLOG, ISD::STRICT_FLOG,
2317 ISD::FLOG10, ISD::STRICT_FLOG10,
2318 ISD::FPOW, ISD::STRICT_FPOW,
2319 ISD::FSIN, ISD::STRICT_FSIN})
2320 if (isOperationExpand(Op, MVT::f32))
2321 setOperationAction(Op, MVT::f32, Promote);
2322
2323 // We have target-specific dag combine patterns for the following nodes:
2324 setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
2325 ISD::SCALAR_TO_VECTOR,
2326 ISD::INSERT_VECTOR_ELT,
2327 ISD::EXTRACT_VECTOR_ELT,
2328 ISD::CONCAT_VECTORS,
2329 ISD::INSERT_SUBVECTOR,
2330 ISD::EXTRACT_SUBVECTOR,
2331 ISD::BITCAST,
2332 ISD::VSELECT,
2333 ISD::SELECT,
2334 ISD::SHL,
2335 ISD::SRA,
2336 ISD::SRL,
2337 ISD::OR,
2338 ISD::AND,
2339 ISD::ADD,
2340 ISD::FADD,
2341 ISD::FSUB,
2342 ISD::FNEG,
2343 ISD::FMA,
2344 ISD::STRICT_FMA,
2345 ISD::FMINNUM,
2346 ISD::FMAXNUM,
2347 ISD::SUB,
2348 ISD::LOAD,
2349 ISD::MLOAD,
2350 ISD::STORE,
2351 ISD::MSTORE,
2352 ISD::TRUNCATE,
2353 ISD::ZERO_EXTEND,
2354 ISD::ANY_EXTEND,
2355 ISD::SIGN_EXTEND,
2356 ISD::SIGN_EXTEND_INREG,
2357 ISD::ANY_EXTEND_VECTOR_INREG,
2358 ISD::SIGN_EXTEND_VECTOR_INREG,
2359 ISD::ZERO_EXTEND_VECTOR_INREG,
2360 ISD::SINT_TO_FP,
2361 ISD::UINT_TO_FP,
2362 ISD::STRICT_SINT_TO_FP,
2363 ISD::STRICT_UINT_TO_FP,
2364 ISD::SETCC,
2365 ISD::MUL,
2366 ISD::XOR,
2367 ISD::MSCATTER,
2368 ISD::MGATHER,
2369 ISD::FP16_TO_FP,
2370 ISD::FP_EXTEND,
2371 ISD::STRICT_FP_EXTEND,
2372 ISD::FP_ROUND,
2373 ISD::STRICT_FP_ROUND});
2374
2375 computeRegisterProperties(Subtarget.getRegisterInfo());
2376
2377 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2378 MaxStoresPerMemsetOptSize = 8;
2379 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2380 MaxStoresPerMemcpyOptSize = 4;
2381 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2382 MaxStoresPerMemmoveOptSize = 4;
2383
2384 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2385 // that needs to benchmarked and balanced with the potential use of vector
2386 // load/store types (PR33329, PR33914).
2387 MaxLoadsPerMemcmp = 2;
2388 MaxLoadsPerMemcmpOptSize = 2;
2389
2390 // Default loop alignment, which can be overridden by -align-loops.
2391 setPrefLoopAlignment(Align(16));
2392
2393 // An out-of-order CPU can speculatively execute past a predictable branch,
2394 // but a conditional move could be stalled by an expensive earlier operation.
2395 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2396 EnableExtLdPromotion = true;
2397 setPrefFunctionAlignment(Align(16));
2398
2399 verifyIntrinsicTables();
2400
2401 // Default to having -disable-strictnode-mutation on
2402 IsStrictFPEnabled = true;
2403}
2404
2405// This has so far only been implemented for 64-bit MachO.
2406bool X86TargetLowering::useLoadStackGuardNode() const {
2407 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2408}
2409
2410bool X86TargetLowering::useStackGuardXorFP() const {
2411 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2412 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2413}
2414
2415SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2416 const SDLoc &DL) const {
2417 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2418 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2419 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2420 return SDValue(Node, 0);
2421}
2422
2423TargetLoweringBase::LegalizeTypeAction
2424X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2425 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2426 !Subtarget.hasBWI())
2427 return TypeSplitVector;
2428
2429 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2430 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
2431 return TypeSplitVector;
2432
2433 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2434 VT.getVectorElementType() != MVT::i1)
2435 return TypeWidenVector;
2436
2437 return TargetLoweringBase::getPreferredVectorAction(VT);
2438}
2439
2440static std::pair<MVT, unsigned>
2441handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2442 const X86Subtarget &Subtarget) {
2443 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2444 // convention is one that uses k registers.
2445 if (NumElts == 2)
2446 return {MVT::v2i64, 1};
2447 if (NumElts == 4)
2448 return {MVT::v4i32, 1};
2449 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2450 CC != CallingConv::Intel_OCL_BI)
2451 return {MVT::v8i16, 1};
2452 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2453 CC != CallingConv::Intel_OCL_BI)
2454 return {MVT::v16i8, 1};
2455 // v32i1 passes in ymm unless we have BWI and the calling convention is
2456 // regcall.
2457 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2458 return {MVT::v32i8, 1};
2459 // Split v64i1 vectors if we don't have v64i8 available.
2460 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2461 if (Subtarget.useAVX512Regs())
2462 return {MVT::v64i8, 1};
2463 return {MVT::v32i8, 2};
2464 }
2465
2466 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2467 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2468 NumElts > 64)
2469 return {MVT::i8, NumElts};
2470
2471 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2472}
2473
2474MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2475 CallingConv::ID CC,
2476 EVT VT) const {
2477 if (VT.isVector()) {
2478 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2479 unsigned NumElts = VT.getVectorNumElements();
2480
2481 MVT RegisterVT;
2482 unsigned NumRegisters;
2483 std::tie(RegisterVT, NumRegisters) =
2484 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2485 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2486 return RegisterVT;
2487 }
2488
2489 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2490 return MVT::v8f16;
2491 }
2492
2493 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
2494 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
2495 !Subtarget.hasX87())
2496 return MVT::i32;
2497
2498 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2499 return getRegisterTypeForCallingConv(Context, CC,
2500 VT.changeVectorElementTypeToInteger());
2501
2502 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2503}
2504
2505unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2506 CallingConv::ID CC,
2507 EVT VT) const {
2508 if (VT.isVector()) {
2509 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
2510 unsigned NumElts = VT.getVectorNumElements();
2511
2512 MVT RegisterVT;
2513 unsigned NumRegisters;
2514 std::tie(RegisterVT, NumRegisters) =
2515 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2516 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2517 return NumRegisters;
2518 }
2519
2520 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
2521 return 1;
2522 }
2523
2524 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
2525 // x87 is disabled.
2526 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
2527 if (VT == MVT::f64)
2528 return 2;
2529 if (VT == MVT::f80)
2530 return 3;
2531 }
2532
2533 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
2534 return getNumRegistersForCallingConv(Context, CC,
2535 VT.changeVectorElementTypeToInteger());
2536
2537 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2538}
2539
2540unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2541 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2542 unsigned &NumIntermediates, MVT &RegisterVT) const {
2543 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2544 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2545 Subtarget.hasAVX512() &&
2546 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2547 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2548 VT.getVectorNumElements() > 64)) {
2549 RegisterVT = MVT::i8;
2550 IntermediateVT = MVT::i1;
2551 NumIntermediates = VT.getVectorNumElements();
2552 return NumIntermediates;
2553 }
2554
2555 // Split v64i1 vectors if we don't have v64i8 available.
2556 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2557 CC != CallingConv::X86_RegCall) {
2558 RegisterVT = MVT::v32i8;
2559 IntermediateVT = MVT::v32i1;
2560 NumIntermediates = 2;
2561 return 2;
2562 }
2563
2564 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2565 NumIntermediates, RegisterVT);
2566}
2567
2568EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2569 LLVMContext& Context,
2570 EVT VT) const {
2571 if (!VT.isVector())
2572 return MVT::i8;
2573
2574 if (Subtarget.hasAVX512()) {
2575 // Figure out what this type will be legalized to.
2576 EVT LegalVT = VT;
2577 while (getTypeAction(Context, LegalVT) != TypeLegal)
2578 LegalVT = getTypeToTransformTo(Context, LegalVT);
2579
2580 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2581 if (LegalVT.getSimpleVT().is512BitVector())
2582 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2583
2584 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2585 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2586 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2587 // vXi16/vXi8.
2588 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2589 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2590 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2591 }
2592 }
2593
2594 return VT.changeVectorElementTypeToInteger();
2595}
2596
2597/// Helper for getByValTypeAlignment to determine
2598/// the desired ByVal argument alignment.
2599static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2600 if (MaxAlign == 16)
2601 return;
2602 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2603 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2604 MaxAlign = Align(16);
2605 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2606 Align EltAlign;
2607 getMaxByValAlign(ATy->getElementType(), EltAlign);
2608 if (EltAlign > MaxAlign)
2609 MaxAlign = EltAlign;
2610 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2611 for (auto *EltTy : STy->elements()) {
2612 Align EltAlign;
2613 getMaxByValAlign(EltTy, EltAlign);
2614 if (EltAlign > MaxAlign)
2615 MaxAlign = EltAlign;
2616 if (MaxAlign == 16)
2617 break;
2618 }
2619 }
2620}
2621
2622/// Return the desired alignment for ByVal aggregate
2623/// function arguments in the caller parameter area. For X86, aggregates
2624/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2625/// are at 4-byte boundaries.
2626uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
2627 const DataLayout &DL) const {
2628 if (Subtarget.is64Bit()) {
2629 // Max of 8 and alignment of type.
2630 Align TyAlign = DL.getABITypeAlign(Ty);
2631 if (TyAlign > 8)
2632 return TyAlign.value();
2633 return 8;
2634 }
2635
2636 Align Alignment(4);
2637 if (Subtarget.hasSSE1())
2638 getMaxByValAlign(Ty, Alignment);
2639 return Alignment.value();
2640}
2641
2642/// It returns EVT::Other if the type should be determined using generic
2643/// target-independent logic.
2644/// For vector ops we check that the overall size isn't larger than our
2645/// preferred vector width.
2646EVT X86TargetLowering::getOptimalMemOpType(
2647 const MemOp &Op, const AttributeList &FuncAttributes) const {
2648 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2649 if (Op.size() >= 16 &&
2650 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2651 // FIXME: Check if unaligned 64-byte accesses are slow.
2652 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2653 (Subtarget.getPreferVectorWidth() >= 512)) {
2654 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2655 }
2656 // FIXME: Check if unaligned 32-byte accesses are slow.
2657 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2658 (Subtarget.getPreferVectorWidth() >= 256)) {
2659 // Although this isn't a well-supported type for AVX1, we'll let
2660 // legalization and shuffle lowering produce the optimal codegen. If we
2661 // choose an optimal type with a vector element larger than a byte,
2662 // getMemsetStores() may create an intermediate splat (using an integer
2663 // multiply) before we splat as a vector.
2664 return MVT::v32i8;
2665 }
2666 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2667 return MVT::v16i8;
2668 // TODO: Can SSE1 handle a byte vector?
2669 // If we have SSE1 registers we should be able to use them.
2670 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2671 (Subtarget.getPreferVectorWidth() >= 128))
2672 return MVT::v4f32;
2673 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2674 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2675 // Do not use f64 to lower memcpy if source is string constant. It's
2676 // better to use i32 to avoid the loads.
2677 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2678 // The gymnastics of splatting a byte value into an XMM register and then
2679 // only using 8-byte stores (because this is a CPU with slow unaligned
2680 // 16-byte accesses) makes that a loser.
2681 return MVT::f64;
2682 }
2683 }
2684 // This is a compromise. If we reach here, unaligned accesses may be slow on
2685 // this target. However, creating smaller, aligned accesses could be even
2686 // slower and would certainly be a lot more code.
2687 if (Subtarget.is64Bit() && Op.size() >= 8)
2688 return MVT::i64;
2689 return MVT::i32;
2690}
2691
2692bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2693 if (VT == MVT::f32)
2694 return Subtarget.hasSSE1();
2695 if (VT == MVT::f64)
2696 return Subtarget.hasSSE2();
2697 return true;
2698}
2699
2700bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2701 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2702 bool *Fast) const {
2703 if (Fast) {
2704 switch (VT.getSizeInBits()) {
2705 default:
2706 // 8-byte and under are always assumed to be fast.
2707 *Fast = true;
2708 break;
2709 case 128:
2710 *Fast = !Subtarget.isUnalignedMem16Slow();
2711 break;
2712 case 256:
2713 *Fast = !Subtarget.isUnalignedMem32Slow();
2714 break;
2715 // TODO: What about AVX-512 (512-bit) accesses?
2716 }
2717 }
2718 // NonTemporal vector memory ops must be aligned.
2719 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2720 // NT loads can only be vector aligned, so if its less aligned than the
2721 // minimum vector size (which we can split the vector down to), we might as
2722 // well use a regular unaligned vector load.
2723 // We don't have any NT loads pre-SSE41.
2724 if (!!(Flags & MachineMemOperand::MOLoad))
2725 return (Alignment < 16 || !Subtarget.hasSSE41());
2726 return false;
2727 }
2728 // Misaligned accesses of any size are always allowed.
2729 return true;
2730}
2731
2732/// Return the entry encoding for a jump table in the
2733/// current function. The returned value is a member of the
2734/// MachineJumpTableInfo::JTEntryKind enum.
2735unsigned X86TargetLowering::getJumpTableEncoding() const {
2736 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2737 // symbol.
2738 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2739 return MachineJumpTableInfo::EK_Custom32;
2740
2741 // Otherwise, use the normal jump table encoding heuristics.
2742 return TargetLowering::getJumpTableEncoding();
2743}
2744
2745bool X86TargetLowering::splitValueIntoRegisterParts(
2746 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
2747 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
2748 bool IsABIRegCopy = CC.has_value();
2749 EVT ValueVT = Val.getValueType();
2750 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2751 unsigned ValueBits = ValueVT.getSizeInBits();
2752 unsigned PartBits = PartVT.getSizeInBits();
2753 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
2754 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
2755 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
2756 Parts[0] = Val;
2757 return true;
2758 }
2759 return false;
2760}
2761
2762SDValue X86TargetLowering::joinRegisterPartsIntoValue(
2763 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
2764 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
2765 bool IsABIRegCopy = CC.has_value();
2766 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
2767 unsigned ValueBits = ValueVT.getSizeInBits();
2768 unsigned PartBits = PartVT.getSizeInBits();
2769 SDValue Val = Parts[0];
2770
2771 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
2772 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
2773 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
2774 return Val;
2775 }
2776 return SDValue();
2777}
2778
2779bool X86TargetLowering::useSoftFloat() const {
2780 return Subtarget.useSoftFloat();
2781}
2782
2783void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2784 ArgListTy &Args) const {
2785
2786 // Only relabel X86-32 for C / Stdcall CCs.
2787 if (Subtarget.is64Bit())
2788 return;
2789 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2790 return;
2791 unsigned ParamRegs = 0;
2792 if (auto *M = MF->getFunction().getParent())
2793 ParamRegs = M->getNumberRegisterParameters();
2794
2795 // Mark the first N int arguments as having reg
2796 for (auto &Arg : Args) {
2797 Type *T = Arg.Ty;
2798 if (T->isIntOrPtrTy())
2799 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2800 unsigned numRegs = 1;
2801 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2802 numRegs = 2;
2803 if (ParamRegs < numRegs)
2804 return;
2805 ParamRegs -= numRegs;
2806 Arg.IsInReg = true;
2807 }
2808 }
2809}
2810
2811const MCExpr *
2812X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2813 const MachineBasicBlock *MBB,
2814 unsigned uid,MCContext &Ctx) const{
2815 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2815, __extension__
__PRETTY_FUNCTION__))
;
2816 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2817 // entries.
2818 return MCSymbolRefExpr::create(MBB->getSymbol(),
2819 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2820}
2821
2822/// Returns relocation base for the given PIC jumptable.
2823SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2824 SelectionDAG &DAG) const {
2825 if (!Subtarget.is64Bit())
2826 // This doesn't have SDLoc associated with it, but is not really the
2827 // same as a Register.
2828 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2829 getPointerTy(DAG.getDataLayout()));
2830 return Table;
2831}
2832
2833/// This returns the relocation base for the given PIC jumptable,
2834/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2835const MCExpr *X86TargetLowering::
2836getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2837 MCContext &Ctx) const {
2838 // X86-64 uses RIP relative addressing based on the jump table label.
2839 if (Subtarget.isPICStyleRIPRel())
2840 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2841
2842 // Otherwise, the reference is relative to the PIC base.
2843 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2844}
2845
2846std::pair<const TargetRegisterClass *, uint8_t>
2847X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2848 MVT VT) const {
2849 const TargetRegisterClass *RRC = nullptr;
2850 uint8_t Cost = 1;
2851 switch (VT.SimpleTy) {
2852 default:
2853 return TargetLowering::findRepresentativeClass(TRI, VT);
2854 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2855 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2856 break;
2857 case MVT::x86mmx:
2858 RRC = &X86::VR64RegClass;
2859 break;
2860 case MVT::f32: case MVT::f64:
2861 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2862 case MVT::v4f32: case MVT::v2f64:
2863 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2864 case MVT::v8f32: case MVT::v4f64:
2865 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2866 case MVT::v16f32: case MVT::v8f64:
2867 RRC = &X86::VR128XRegClass;
2868 break;
2869 }
2870 return std::make_pair(RRC, Cost);
2871}
2872
2873unsigned X86TargetLowering::getAddressSpace() const {
2874 if (Subtarget.is64Bit())
2875 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2876 return 256;
2877}
2878
2879static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2880 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2881 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2882}
2883
2884static Constant* SegmentOffset(IRBuilderBase &IRB,
2885 int Offset, unsigned AddressSpace) {
2886 return ConstantExpr::getIntToPtr(
2887 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2888 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2889}
2890
2891Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
2892 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2893 // tcbhead_t; use it instead of the usual global variable (see
2894 // sysdeps/{i386,x86_64}/nptl/tls.h)
2895 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2896 if (Subtarget.isTargetFuchsia()) {
2897 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2898 return SegmentOffset(IRB, 0x10, getAddressSpace());
2899 } else {
2900 unsigned AddressSpace = getAddressSpace();
2901 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
2902 // Specially, some users may customize the base reg and offset.
2903 int Offset = M->getStackProtectorGuardOffset();
2904 // If we don't set -stack-protector-guard-offset value:
2905 // %fs:0x28, unless we're using a Kernel code model, in which case
2906 // it's %gs:0x28. gs:0x14 on i386.
2907 if (Offset == INT_MAX2147483647)
2908 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2909
2910 StringRef GuardReg = M->getStackProtectorGuardReg();
2911 if (GuardReg == "fs")
2912 AddressSpace = X86AS::FS;
2913 else if (GuardReg == "gs")
2914 AddressSpace = X86AS::GS;
2915
2916 // Use symbol guard if user specify.
2917 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
2918 if (!GuardSymb.empty()) {
2919 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
2920 if (!GV) {
2921 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
2922 : Type::getInt32Ty(M->getContext());
2923 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
2924 nullptr, GuardSymb, nullptr,
2925 GlobalValue::NotThreadLocal, AddressSpace);
2926 }
2927 return GV;
2928 }
2929
2930 return SegmentOffset(IRB, Offset, AddressSpace);
2931 }
2932 }
2933 return TargetLowering::getIRStackGuard(IRB);
2934}
2935
2936void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2937 // MSVC CRT provides functionalities for stack protection.
2938 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2939 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2940 // MSVC CRT has a global variable holding security cookie.
2941 M.getOrInsertGlobal("__security_cookie",
2942 Type::getInt8PtrTy(M.getContext()));
2943
2944 // MSVC CRT has a function to validate security cookie.
2945 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2946 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2947 Type::getInt8PtrTy(M.getContext()));
2948 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2949 F->setCallingConv(CallingConv::X86_FastCall);
2950 F->addParamAttr(0, Attribute::AttrKind::InReg);
2951 }
2952 return;
2953 }
2954
2955 StringRef GuardMode = M.getStackProtectorGuard();
2956
2957 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2958 if ((GuardMode == "tls" || GuardMode.empty()) &&
2959 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2960 return;
2961 TargetLowering::insertSSPDeclarations(M);
2962}
2963
2964Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2965 // MSVC CRT has a global variable holding security cookie.
2966 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2967 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2968 return M.getGlobalVariable("__security_cookie");
2969 }
2970 return TargetLowering::getSDagStackGuard(M);
2971}
2972
2973Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2974 // MSVC CRT has a function to validate security cookie.
2975 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2976 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2977 return M.getFunction("__security_check_cookie");
2978 }
2979 return TargetLowering::getSSPStackGuardCheck(M);
2980}
2981
2982Value *
2983X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
2984 if (Subtarget.getTargetTriple().isOSContiki())
2985 return getDefaultSafeStackPointerLocation(IRB, false);
2986
2987 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2988 // definition of TLS_SLOT_SAFESTACK in
2989 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2990 if (Subtarget.isTargetAndroid()) {
2991 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2992 // %gs:0x24 on i386
2993 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2994 return SegmentOffset(IRB, Offset, getAddressSpace());
2995 }
2996
2997 // Fuchsia is similar.
2998 if (Subtarget.isTargetFuchsia()) {
2999 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
3000 return SegmentOffset(IRB, 0x18, getAddressSpace());
3001 }
3002
3003 return TargetLowering::getSafeStackPointerLocation(IRB);
3004}
3005
3006//===----------------------------------------------------------------------===//
3007// Return Value Calling Convention Implementation
3008//===----------------------------------------------------------------------===//
3009
3010bool X86TargetLowering::CanLowerReturn(
3011 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3012 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3013 SmallVector<CCValAssign, 16> RVLocs;
3014 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3015 return CCInfo.CheckReturn(Outs, RetCC_X86);
3016}
3017
3018const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
3019 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
3020 return ScratchRegs;
3021}
3022
3023/// Lowers masks values (v*i1) to the local register values
3024/// \returns DAG node after lowering to register type
3025static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
3026 const SDLoc &Dl, SelectionDAG &DAG) {
3027 EVT ValVT = ValArg.getValueType();
3028
3029 if (ValVT == MVT::v1i1)
3030 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
3031 DAG.getIntPtrConstant(0, Dl));
3032
3033 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
3034 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
3035 // Two stage lowering might be required
3036 // bitcast: v8i1 -> i8 / v16i1 -> i16
3037 // anyextend: i8 -> i32 / i16 -> i32
3038 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
3039 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
3040 if (ValLoc == MVT::i32)
3041 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
3042 return ValToCopy;
3043 }
3044
3045 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
3046 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
3047 // One stage lowering is required
3048 // bitcast: v32i1 -> i32 / v64i1 -> i64
3049 return DAG.getBitcast(ValLoc, ValArg);
3050 }
3051
3052 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
3053}
3054
3055/// Breaks v64i1 value into two registers and adds the new node to the DAG
3056static void Passv64i1ArgInRegs(
3057 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
3058 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
3059 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
3060 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3060, __extension__
__PRETTY_FUNCTION__))
;
3061 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3061, __extension__
__PRETTY_FUNCTION__))
;
3062 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3062, __extension__
__PRETTY_FUNCTION__))
;
3063 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3064, __extension__
__PRETTY_FUNCTION__))
3064 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3064, __extension__
__PRETTY_FUNCTION__))
;
3065
3066 // Before splitting the value we cast it to i64
3067 Arg = DAG.getBitcast(MVT::i64, Arg);
3068
3069 // Splitting the value into two i32 types
3070 SDValue Lo, Hi;
3071 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
3072 DAG.getConstant(0, Dl, MVT::i32));
3073 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
3074 DAG.getConstant(1, Dl, MVT::i32));
3075
3076 // Attach the two i32 types into corresponding registers
3077 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
3078 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
3079}
3080
3081SDValue
3082X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3083 bool isVarArg,
3084 const SmallVectorImpl<ISD::OutputArg> &Outs,
3085 const SmallVectorImpl<SDValue> &OutVals,
3086 const SDLoc &dl, SelectionDAG &DAG) const {
3087 MachineFunction &MF = DAG.getMachineFunction();
3088 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3089
3090 // In some cases we need to disable registers from the default CSR list.
3091 // For example, when they are used for argument passing.
3092 bool ShouldDisableCalleeSavedRegister =
3093 CallConv == CallingConv::X86_RegCall ||
3094 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
3095
3096 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
3097 report_fatal_error("X86 interrupts may not return any value");
3098
3099 SmallVector<CCValAssign, 16> RVLocs;
3100 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
3101 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
3102
3103 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
3104 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
3105 ++I, ++OutsIndex) {
3106 CCValAssign &VA = RVLocs[I];
3107 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3107, __extension__
__PRETTY_FUNCTION__))
;
3108
3109 // Add the register to the CalleeSaveDisableRegs list.
3110 if (ShouldDisableCalleeSavedRegister)
3111 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
3112
3113 SDValue ValToCopy = OutVals[OutsIndex];
3114 EVT ValVT = ValToCopy.getValueType();
3115
3116 // Promote values to the appropriate types.
3117 if (VA.getLocInfo() == CCValAssign::SExt)
3118 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
3119 else if (VA.getLocInfo() == CCValAssign::ZExt)
3120 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
3121 else if (VA.getLocInfo() == CCValAssign::AExt) {
3122 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
3123 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
3124 else
3125 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
3126 }
3127 else if (VA.getLocInfo() == CCValAssign::BCvt)
3128 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
3129
3130 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3131, __extension__
__PRETTY_FUNCTION__))
3131 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3131, __extension__
__PRETTY_FUNCTION__))
;
3132
3133 // Report an error if we have attempted to return a value via an XMM
3134 // register and SSE was disabled.
3135 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3136 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3137 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3138 } else if (!Subtarget.hasSSE2() &&
3139 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3140 ValVT == MVT::f64) {
3141 // When returning a double via an XMM register, report an error if SSE2 is
3142 // not enabled.
3143 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3144 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3145 }
3146
3147 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
3148 // the RET instruction and handled by the FP Stackifier.
3149 if (VA.getLocReg() == X86::FP0 ||
3150 VA.getLocReg() == X86::FP1) {
3151 // If this is a copy from an xmm register to ST(0), use an FPExtend to
3152 // change the value to the FP stack register class.
3153 if (isScalarFPTypeInSSEReg(VA.getValVT()))
3154 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
3155 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3156 // Don't emit a copytoreg.
3157 continue;
3158 }
3159
3160 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
3161 // which is returned in RAX / RDX.
3162 if (Subtarget.is64Bit()) {
3163 if (ValVT == MVT::x86mmx) {
3164 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
3165 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
3166 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
3167 ValToCopy);
3168 // If we don't have SSE2 available, convert to v4f32 so the generated
3169 // register is legal.
3170 if (!Subtarget.hasSSE2())
3171 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
3172 }
3173 }
3174 }
3175
3176 if (VA.needsCustom()) {
3177 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3178, __extension__
__PRETTY_FUNCTION__))
3178 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3178, __extension__
__PRETTY_FUNCTION__))
;
3179
3180 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
3181 Subtarget);
3182
3183 // Add the second register to the CalleeSaveDisableRegs list.
3184 if (ShouldDisableCalleeSavedRegister)
3185 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
3186 } else {
3187 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3188 }
3189 }
3190
3191 SDValue Flag;
3192 SmallVector<SDValue, 6> RetOps;
3193 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3194 // Operand #1 = Bytes To Pop
3195 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
3196 MVT::i32));
3197
3198 // Copy the result values into the output registers.
3199 for (auto &RetVal : RetVals) {
3200 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
3201 RetOps.push_back(RetVal.second);
3202 continue; // Don't emit a copytoreg.
3203 }
3204
3205 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
3206 Flag = Chain.getValue(1);
3207 RetOps.push_back(
3208 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
3209 }
3210
3211 // Swift calling convention does not require we copy the sret argument
3212 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
3213
3214 // All x86 ABIs require that for returning structs by value we copy
3215 // the sret argument into %rax/%eax (depending on ABI) for the return.
3216 // We saved the argument into a virtual register in the entry block,
3217 // so now we copy the value out and into %rax/%eax.
3218 //
3219 // Checking Function.hasStructRetAttr() here is insufficient because the IR
3220 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
3221 // false, then an sret argument may be implicitly inserted in the SelDAG. In
3222 // either case FuncInfo->setSRetReturnReg() will have been called.
3223 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3224 // When we have both sret and another return value, we should use the
3225 // original Chain stored in RetOps[0], instead of the current Chain updated
3226 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
3227
3228 // For the case of sret and another return value, we have
3229 // Chain_0 at the function entry
3230 // Chain_1 = getCopyToReg(Chain_0) in the above loop
3231 // If we use Chain_1 in getCopyFromReg, we will have
3232 // Val = getCopyFromReg(Chain_1)
3233 // Chain_2 = getCopyToReg(Chain_1, Val) from below
3234
3235 // getCopyToReg(Chain_0) will be glued together with
3236 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
3237 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
3238 // Data dependency from Unit B to Unit A due to usage of Val in
3239 // getCopyToReg(Chain_1, Val)
3240 // Chain dependency from Unit A to Unit B
3241
3242 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
3243 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
3244 getPointerTy(MF.getDataLayout()));
3245
3246 Register RetValReg
3247 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
3248 X86::RAX : X86::EAX;
3249 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
3250 Flag = Chain.getValue(1);
3251
3252 // RAX/EAX now acts like a return value.
3253 RetOps.push_back(
3254 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
3255
3256 // Add the returned register to the CalleeSaveDisableRegs list.
3257 if (ShouldDisableCalleeSavedRegister)
3258 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
3259 }
3260
3261 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3262 const MCPhysReg *I =
3263 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3264 if (I) {
3265 for (; *I; ++I) {
3266 if (X86::GR64RegClass.contains(*I))
3267 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3268 else
3269 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3269)
;
3270 }
3271 }
3272
3273 RetOps[0] = Chain; // Update chain.
3274
3275 // Add the flag if we have it.
3276 if (Flag.getNode())
3277 RetOps.push_back(Flag);
3278
3279 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
3280 if (CallConv == CallingConv::X86_INTR)
3281 opcode = X86ISD::IRET;
3282 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
3283}
3284
3285bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3286 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
3287 return false;
3288
3289 SDValue TCChain = Chain;
3290 SDNode *Copy = *N->use_begin();
3291 if (Copy->getOpcode() == ISD::CopyToReg) {
3292 // If the copy has a glue operand, we conservatively assume it isn't safe to
3293 // perform a tail call.
3294 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3295 return false;
3296 TCChain = Copy->getOperand(0);
3297 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
3298 return false;
3299
3300 bool HasRet = false;
3301 for (const SDNode *U : Copy->uses()) {
3302 if (U->getOpcode() != X86ISD::RET_FLAG)
3303 return false;
3304 // If we are returning more than one value, we can definitely
3305 // not make a tail call see PR19530
3306 if (U->getNumOperands() > 4)
3307 return false;
3308 if (U->getNumOperands() == 4 &&
3309 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
3310 return false;
3311 HasRet = true;
3312 }
3313
3314 if (!HasRet)
3315 return false;
3316
3317 Chain = TCChain;
3318 return true;
3319}
3320
3321EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
3322 ISD::NodeType ExtendKind) const {
3323 MVT ReturnMVT = MVT::i32;
3324
3325 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
3326 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
3327 // The ABI does not require i1, i8 or i16 to be extended.
3328 //
3329 // On Darwin, there is code in the wild relying on Clang's old behaviour of
3330 // always extending i8/i16 return values, so keep doing that for now.
3331 // (PR26665).
3332 ReturnMVT = MVT::i8;
3333 }
3334
3335 EVT MinVT = getRegisterType(Context, ReturnMVT);
3336 return VT.bitsLT(MinVT) ? MinVT : VT;
3337}
3338
3339/// Reads two 32 bit registers and creates a 64 bit mask value.
3340/// \param VA The current 32 bit value that need to be assigned.
3341/// \param NextVA The next 32 bit value that need to be assigned.
3342/// \param Root The parent DAG node.
3343/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
3344/// glue purposes. In the case the DAG is already using
3345/// physical register instead of virtual, we should glue
3346/// our new SDValue to InFlag SDvalue.
3347/// \return a new SDvalue of size 64bit.
3348static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
3349 SDValue &Root, SelectionDAG &DAG,
3350 const SDLoc &Dl, const X86Subtarget &Subtarget,
3351 SDValue *InFlag = nullptr) {
3352 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3352, __extension__
__PRETTY_FUNCTION__))
;
3353 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3353, __extension__
__PRETTY_FUNCTION__))
;
3354 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3355, __extension__
__PRETTY_FUNCTION__))
3355 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3355, __extension__
__PRETTY_FUNCTION__))
;
3356 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3357, __extension__
__PRETTY_FUNCTION__))
3357 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3357, __extension__
__PRETTY_FUNCTION__))
;
3358 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3359, __extension__
__PRETTY_FUNCTION__))
3359 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3359, __extension__
__PRETTY_FUNCTION__))
;
3360
3361 SDValue Lo, Hi;
3362 SDValue ArgValueLo, ArgValueHi;
3363
3364 MachineFunction &MF = DAG.getMachineFunction();
3365 const TargetRegisterClass *RC = &X86::GR32RegClass;
3366
3367 // Read a 32 bit value from the registers.
3368 if (nullptr == InFlag) {
3369 // When no physical register is present,
3370 // create an intermediate virtual register.
3371 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3372 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3373 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3374 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3375 } else {
3376 // When a physical register is available read the value from it and glue
3377 // the reads together.
3378 ArgValueLo =
3379 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
3380 *InFlag = ArgValueLo.getValue(2);
3381 ArgValueHi =
3382 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
3383 *InFlag = ArgValueHi.getValue(2);
3384 }
3385
3386 // Convert the i32 type into v32i1 type.
3387 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
3388
3389 // Convert the i32 type into v32i1 type.
3390 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
3391
3392 // Concatenate the two values together.
3393 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
3394}
3395
3396/// The function will lower a register of various sizes (8/16/32/64)
3397/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
3398/// \returns a DAG node contains the operand after lowering to mask type.
3399static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3400 const EVT &ValLoc, const SDLoc &Dl,
3401 SelectionDAG &DAG) {
3402 SDValue ValReturned = ValArg;
3403
3404 if (ValVT == MVT::v1i1)
3405 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3406
3407 if (ValVT == MVT::v64i1) {
3408 // In 32 bit machine, this case is handled by getv64i1Argument
3409 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3409, __extension__
__PRETTY_FUNCTION__))
;
3410 // In 64 bit machine, There is no need to truncate the value only bitcast
3411 } else {
3412 MVT maskLen;
3413 switch (ValVT.getSimpleVT().SimpleTy) {
3414 case MVT::v8i1:
3415 maskLen = MVT::i8;
3416 break;
3417 case MVT::v16i1:
3418 maskLen = MVT::i16;
3419 break;
3420 case MVT::v32i1:
3421 maskLen = MVT::i32;
3422 break;
3423 default:
3424 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3424)
;
3425 }
3426
3427 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3428 }
3429 return DAG.getBitcast(ValVT, ValReturned);
3430}
3431
3432/// Lower the result values of a call into the
3433/// appropriate copies out of appropriate physical registers.
3434///
3435SDValue X86TargetLowering::LowerCallResult(
3436 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3437 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3438 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3439 uint32_t *RegMask) const {
3440
3441 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3442 // Assign locations to each value returned by this call.
3443 SmallVector<CCValAssign, 16> RVLocs;
3444 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3445 *DAG.getContext());
3446 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3447
3448 // Copy all of the result registers out of their specified physreg.
3449 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3450 ++I, ++InsIndex) {
3451 CCValAssign &VA = RVLocs[I];
3452 EVT CopyVT = VA.getLocVT();
3453
3454 // In some calling conventions we need to remove the used registers
3455 // from the register mask.
3456 if (RegMask) {
3457 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3458 SubRegs.isValid(); ++SubRegs)
3459 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3460 }
3461
3462 // Report an error if there was an attempt to return FP values via XMM
3463 // registers.
3464 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3465 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3466 if (VA.getLocReg() == X86::XMM1)
3467 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3468 else
3469 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3470 } else if (!Subtarget.hasSSE2() &&
3471 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3472 CopyVT == MVT::f64) {
3473 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3474 if (VA.getLocReg() == X86::XMM1)
3475 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3476 else
3477 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3478 }
3479
3480 // If we prefer to use the value in xmm registers, copy it out as f80 and
3481 // use a truncate to move it from fp stack reg to xmm reg.
3482 bool RoundAfterCopy = false;
3483 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3484 isScalarFPTypeInSSEReg(VA.getValVT())) {
3485 if (!Subtarget.hasX87())
3486 report_fatal_error("X87 register return with X87 disabled");
3487 CopyVT = MVT::f80;
3488 RoundAfterCopy = (CopyVT != VA.getLocVT());
3489 }
3490
3491 SDValue Val;
3492 if (VA.needsCustom()) {
3493 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3494, __extension__
__PRETTY_FUNCTION__))
3494 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3494, __extension__
__PRETTY_FUNCTION__))
;
3495 Val =
3496 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3497 } else {
3498 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3499 .getValue(1);
3500 Val = Chain.getValue(0);
3501 InFlag = Chain.getValue(2);
3502 }
3503
3504 if (RoundAfterCopy)
3505 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3506 // This truncation won't change the value.
3507 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
3508
3509 if (VA.isExtInLoc()) {
3510 if (VA.getValVT().isVector() &&
3511 VA.getValVT().getScalarType() == MVT::i1 &&
3512 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3513 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3514 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3515 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3516 } else
3517 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3518 }
3519
3520 if (VA.getLocInfo() == CCValAssign::BCvt)
3521 Val = DAG.getBitcast(VA.getValVT(), Val);
3522
3523 InVals.push_back(Val);
3524 }
3525
3526 return Chain;
3527}
3528
3529//===----------------------------------------------------------------------===//
3530// C & StdCall & Fast Calling Convention implementation
3531//===----------------------------------------------------------------------===//
3532// StdCall calling convention seems to be standard for many Windows' API
3533// routines and around. It differs from C calling convention just a little:
3534// callee should clean up the stack, not caller. Symbols should be also
3535// decorated in some fancy way :) It doesn't support any vector arguments.
3536// For info on fast calling convention see Fast Calling Convention (tail call)
3537// implementation LowerX86_32FastCCCallTo.
3538
3539/// Determines whether Args, either a set of outgoing arguments to a call, or a
3540/// set of incoming args of a call, contains an sret pointer that the callee
3541/// pops
3542template <typename T>
3543static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
3544 const X86Subtarget &Subtarget) {
3545 // Not C++20 (yet), so no concepts available.
3546 static_assert(std::is_same<T, ISD::OutputArg>::value ||
3547 std::is_same<T, ISD::InputArg>::value,
3548 "requires ISD::OutputArg or ISD::InputArg");
3549
3550 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
3551 // for most compilations.
3552 if (!Subtarget.is32Bit())
3553 return false;
3554
3555 if (Args.empty())
3556 return false;
3557
3558 // Most calls do not have an sret argument, check the arg next.
3559 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
3560 if (!Flags.isSRet() || Flags.isInReg())
3561 return false;
3562
3563 // The MSVCabi does not pop the sret.
3564 if (Subtarget.getTargetTriple().isOSMSVCRT())
3565 return false;
3566
3567 // MCUs don't pop the sret
3568 if (Subtarget.isTargetMCU())
3569 return false;
3570
3571 // Callee pops argument
3572 return true;
3573}
3574
3575/// Make a copy of an aggregate at address specified by "Src" to address
3576/// "Dst" with size and alignment information specified by the specific
3577/// parameter attribute. The copy will be passed as a byval function parameter.
3578static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3579 SDValue Chain, ISD::ArgFlagsTy Flags,
3580 SelectionDAG &DAG, const SDLoc &dl) {
3581 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3582
3583 return DAG.getMemcpy(
3584 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3585 /*isVolatile*/ false, /*AlwaysInline=*/true,
3586 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3587}
3588
3589/// Return true if the calling convention is one that we can guarantee TCO for.
3590static bool canGuaranteeTCO(CallingConv::ID CC) {
3591 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3592 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3593 CC == CallingConv::HHVM || CC == CallingConv::Tail ||
3594 CC == CallingConv::SwiftTail);
3595}
3596
3597/// Return true if we might ever do TCO for calls with this calling convention.
3598static bool mayTailCallThisCC(CallingConv::ID CC) {
3599 switch (CC) {
3600 // C calling conventions:
3601 case CallingConv::C:
3602 case CallingConv::Win64:
3603 case CallingConv::X86_64_SysV:
3604 // Callee pop conventions:
3605 case CallingConv::X86_ThisCall:
3606 case CallingConv::X86_StdCall:
3607 case CallingConv::X86_VectorCall:
3608 case CallingConv::X86_FastCall:
3609 // Swift:
3610 case CallingConv::Swift:
3611 return true;
3612 default:
3613 return canGuaranteeTCO(CC);
3614 }
3615}
3616
3617/// Return true if the function is being made into a tailcall target by
3618/// changing its ABI.
3619static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3620 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3621 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3622}
3623
3624bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3625 if (!CI->isTailCall())
3626 return false;
3627
3628 CallingConv::ID CalleeCC = CI->getCallingConv();
3629 if (!mayTailCallThisCC(CalleeCC))
3630 return false;
3631
3632 return true;
3633}
3634
3635SDValue
3636X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3637 const SmallVectorImpl<ISD::InputArg> &Ins,
3638 const SDLoc &dl, SelectionDAG &DAG,
3639 const CCValAssign &VA,
3640 MachineFrameInfo &MFI, unsigned i) const {
3641 // Create the nodes corresponding to a load from this parameter slot.
3642 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3643 bool AlwaysUseMutable = shouldGuaranteeTCO(
3644 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3645 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3646 EVT ValVT;
3647 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3648
3649 // If value is passed by pointer we have address passed instead of the value
3650 // itself. No need to extend if the mask value and location share the same
3651 // absolute size.
3652 bool ExtendedInMem =
3653 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3654 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3655
3656 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3657 ValVT = VA.getLocVT();
3658 else
3659 ValVT = VA.getValVT();
3660
3661 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3662 // changed with more analysis.
3663 // In case of tail call optimization mark all arguments mutable. Since they
3664 // could be overwritten by lowering of arguments in case of a tail call.
3665 if (Flags.isByVal()) {
3666 unsigned Bytes = Flags.getByValSize();
3667 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3668
3669 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3670 // can be improved with deeper analysis.
3671 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3672 /*isAliased=*/true);
3673 return DAG.getFrameIndex(FI, PtrVT);
3674 }
3675
3676 EVT ArgVT = Ins[i].ArgVT;
3677
3678 // If this is a vector that has been split into multiple parts, and the
3679 // scalar size of the parts don't match the vector element size, then we can't
3680 // elide the copy. The parts will have padding between them instead of being
3681 // packed like a vector.
3682 bool ScalarizedAndExtendedVector =
3683 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3684 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3685
3686 // This is an argument in memory. We might be able to perform copy elision.
3687 // If the argument is passed directly in memory without any extension, then we
3688 // can perform copy elision. Large vector types, for example, may be passed
3689 // indirectly by pointer.
3690 if (Flags.isCopyElisionCandidate() &&
3691 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3692 !ScalarizedAndExtendedVector) {
3693 SDValue PartAddr;
3694 if (Ins[i].PartOffset == 0) {
3695 // If this is a one-part value or the first part of a multi-part value,
3696 // create a stack object for the entire argument value type and return a
3697 // load from our portion of it. This assumes that if the first part of an
3698 // argument is in memory, the rest will also be in memory.
3699 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3700 /*IsImmutable=*/false);
3701 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3702 return DAG.getLoad(
3703 ValVT, dl, Chain, PartAddr,
3704 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3705 } else {
3706 // This is not the first piece of an argument in memory. See if there is
3707 // already a fixed stack object including this offset. If so, assume it
3708 // was created by the PartOffset == 0 branch above and create a load from
3709 // the appropriate offset into it.
3710 int64_t PartBegin = VA.getLocMemOffset();
3711 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3712 int FI = MFI.getObjectIndexBegin();
3713 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3714 int64_t ObjBegin = MFI.getObjectOffset(FI);
3715 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3716 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3717 break;
3718 }
3719 if (MFI.isFixedObjectIndex(FI)) {
3720 SDValue Addr =
3721 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3722 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3723 return DAG.getLoad(
3724 ValVT, dl, Chain, Addr,
3725 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3726 Ins[i].PartOffset));
3727 }
3728 }
3729 }
3730
3731 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3732 VA.getLocMemOffset(), isImmutable);
3733
3734 // Set SExt or ZExt flag.
3735 if (VA.getLocInfo() == CCValAssign::ZExt) {
3736 MFI.setObjectZExt(FI, true);
3737 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3738 MFI.setObjectSExt(FI, true);
3739 }
3740
3741 MaybeAlign Alignment;
3742 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
3743 ValVT != MVT::f80)
3744 Alignment = MaybeAlign(4);
3745 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3746 SDValue Val = DAG.getLoad(
3747 ValVT, dl, Chain, FIN,
3748 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3749 Alignment);
3750 return ExtendedInMem
3751 ? (VA.getValVT().isVector()
3752 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3753 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3754 : Val;
3755}
3756
3757// FIXME: Get this from tablegen.
3758static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3759 const X86Subtarget &Subtarget) {
3760 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3760, __extension__ __PRETTY_FUNCTION__))
;
3761
3762 if (Subtarget.isCallingConvWin64(CallConv)) {
3763 static const MCPhysReg GPR64ArgRegsWin64[] = {
3764 X86::RCX, X86::RDX, X86::R8, X86::R9
3765 };
3766 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3767 }
3768
3769 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3770 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3771 };
3772 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3773}
3774
3775// FIXME: Get this from tablegen.
3776static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3777 CallingConv::ID CallConv,
3778 const X86Subtarget &Subtarget) {
3779 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3779, __extension__ __PRETTY_FUNCTION__))
;
3780 if (Subtarget.isCallingConvWin64(CallConv)) {
3781 // The XMM registers which might contain var arg parameters are shadowed
3782 // in their paired GPR. So we only need to save the GPR to their home
3783 // slots.
3784 // TODO: __vectorcall will change this.
3785 return None;
3786 }
3787
3788 bool isSoftFloat = Subtarget.useSoftFloat();
3789 if (isSoftFloat || !Subtarget.hasSSE1())
3790 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3791 // registers.
3792 return None;
3793
3794 static const MCPhysReg XMMArgRegs64Bit[] = {
3795 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3796 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3797 };
3798 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3799}
3800
3801#ifndef NDEBUG
3802static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3803 return llvm::is_sorted(
3804 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3805 return A.getValNo() < B.getValNo();
3806 });
3807}
3808#endif
3809
3810namespace {
3811/// This is a helper class for lowering variable arguments parameters.
3812class VarArgsLoweringHelper {
3813public:
3814 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3815 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3816 CallingConv::ID CallConv, CCState &CCInfo)
3817 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3818 TheMachineFunction(DAG.getMachineFunction()),
3819 TheFunction(TheMachineFunction.getFunction()),
3820 FrameInfo(TheMachineFunction.getFrameInfo()),
3821 FrameLowering(*Subtarget.getFrameLowering()),
3822 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3823 CCInfo(CCInfo) {}
3824
3825 // Lower variable arguments parameters.
3826 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3827
3828private:
3829 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3830
3831 void forwardMustTailParameters(SDValue &Chain);
3832
3833 bool is64Bit() const { return Subtarget.is64Bit(); }
3834 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3835
3836 X86MachineFunctionInfo *FuncInfo;
3837 const SDLoc &DL;
3838 SelectionDAG &DAG;
3839 const X86Subtarget &Subtarget;
3840 MachineFunction &TheMachineFunction;
3841 const Function &TheFunction;
3842 MachineFrameInfo &FrameInfo;
3843 const TargetFrameLowering &FrameLowering;
3844 const TargetLowering &TargLowering;
3845 CallingConv::ID CallConv;
3846 CCState &CCInfo;
3847};
3848} // namespace
3849
3850void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3851 SDValue &Chain, unsigned StackSize) {
3852 // If the function takes variable number of arguments, make a frame index for
3853 // the start of the first vararg value... for expansion of llvm.va_start. We
3854 // can skip this if there are no va_start calls.
3855 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3856 CallConv != CallingConv::X86_ThisCall)) {
3857 FuncInfo->setVarArgsFrameIndex(
3858 FrameInfo.CreateFixedObject(1, StackSize, true));
3859 }
3860
3861 // 64-bit calling conventions support varargs and register parameters, so we
3862 // have to do extra work to spill them in the prologue.
3863 if (is64Bit()) {
3864 // Find the first unallocated argument registers.
3865 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3866 ArrayRef<MCPhysReg> ArgXMMs =
3867 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3868 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3869 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3870
3871 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3872, __extension__
__PRETTY_FUNCTION__))
3872 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3872, __extension__
__PRETTY_FUNCTION__))
;
3873
3874 if (isWin64()) {
3875 // Get to the caller-allocated home save location. Add 8 to account
3876 // for the return address.
3877 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3878 FuncInfo->setRegSaveFrameIndex(
3879 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3880 // Fixup to set vararg frame on shadow area (4 x i64).
3881 if (NumIntRegs < 4)
3882 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3883 } else {
3884 // For X86-64, if there are vararg parameters that are passed via
3885 // registers, then we must store them to their spots on the stack so
3886 // they may be loaded by dereferencing the result of va_next.
3887 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3888 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3889 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3890 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3891 }
3892
3893 SmallVector<SDValue, 6>
3894 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3895 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3896 // keeping live input value
3897 SDValue ALVal; // if applicable keeps SDValue for %al register
3898
3899 // Gather all the live in physical registers.
3900 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3901 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3902 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3903 }
3904 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3905 if (!AvailableXmms.empty()) {
3906 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3907 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3908 for (MCPhysReg Reg : AvailableXmms) {
3909 // FastRegisterAllocator spills virtual registers at basic
3910 // block boundary. That leads to usages of xmm registers
3911 // outside of check for %al. Pass physical registers to
3912 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
3913 TheMachineFunction.getRegInfo().addLiveIn(Reg);
3914 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
3915 }
3916 }
3917
3918 // Store the integer parameter registers.
3919 SmallVector<SDValue, 8> MemOps;
3920 SDValue RSFIN =
3921 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3922 TargLowering.getPointerTy(DAG.getDataLayout()));
3923 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3924 for (SDValue Val : LiveGPRs) {
3925 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3926 TargLowering.getPointerTy(DAG.getDataLayout()),
3927 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3928 SDValue Store =
3929 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3930 MachinePointerInfo::getFixedStack(
3931 DAG.getMachineFunction(),
3932 FuncInfo->getRegSaveFrameIndex(), Offset));
3933 MemOps.push_back(Store);
3934 Offset += 8;
3935 }
3936
3937 // Now store the XMM (fp + vector) parameter registers.
3938 if (!LiveXMMRegs.empty()) {
3939 SmallVector<SDValue, 12> SaveXMMOps;
3940 SaveXMMOps.push_back(Chain);
3941 SaveXMMOps.push_back(ALVal);
3942 SaveXMMOps.push_back(RSFIN);
3943 SaveXMMOps.push_back(
3944 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3945 llvm::append_range(SaveXMMOps, LiveXMMRegs);
3946 MachineMemOperand *StoreMMO =
3947 DAG.getMachineFunction().getMachineMemOperand(
3948 MachinePointerInfo::getFixedStack(
3949 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
3950 Offset),
3951 MachineMemOperand::MOStore, 128, Align(16));
3952 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
3953 DL, DAG.getVTList(MVT::Other),
3954 SaveXMMOps, MVT::i8, StoreMMO));
3955 }
3956
3957 if (!MemOps.empty())
3958 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3959 }
3960}
3961
3962void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3963 // Find the largest legal vector type.
3964 MVT VecVT = MVT::Other;
3965 // FIXME: Only some x86_32 calling conventions support AVX512.
3966 if (Subtarget.useAVX512Regs() &&
3967 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3968 CallConv == CallingConv::Intel_OCL_BI)))
3969 VecVT = MVT::v16f32;
3970 else if (Subtarget.hasAVX())
3971 VecVT = MVT::v8f32;
3972 else if (Subtarget.hasSSE2())
3973 VecVT = MVT::v4f32;
3974
3975 // We forward some GPRs and some vector types.
3976 SmallVector<MVT, 2> RegParmTypes;
3977 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3978 RegParmTypes.push_back(IntVT);
3979 if (VecVT != MVT::Other)
3980 RegParmTypes.push_back(VecVT);
3981
3982 // Compute the set of forwarded registers. The rest are scratch.
3983 SmallVectorImpl<ForwardedRegister> &Forwards =
3984 FuncInfo->getForwardedMustTailRegParms();
3985 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3986
3987 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3988 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3989 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3990 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3991 }
3992
3993 // Copy all forwards from physical to virtual registers.
3994 for (ForwardedRegister &FR : Forwards) {
3995 // FIXME: Can we use a less constrained schedule?
3996 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3997 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3998 TargLowering.getRegClassFor(FR.VT));
3999 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
4000 }
4001}
4002
4003void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
4004 unsigned StackSize) {
4005 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
4006 // If necessary, it would be set into the correct value later.
4007 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
4008 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4009
4010 if (FrameInfo.hasVAStart())
4011 createVarArgAreaAndStoreRegisters(Chain, StackSize);
4012
4013 if (FrameInfo.hasMustTailInVarArgFunc())
4014 forwardMustTailParameters(Chain);
4015}
4016
4017SDValue X86TargetLowering::LowerFormalArguments(
4018 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4019 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4020 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4021 MachineFunction &MF = DAG.getMachineFunction();
4022 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4023
4024 const Function &F = MF.getFunction();
4025 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
4026 F.getName() == "main")
4027 FuncInfo->setForceFramePointer(true);
4028
4029 MachineFrameInfo &MFI = MF.getFrameInfo();
4030 bool Is64Bit = Subtarget.is64Bit();
4031 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4032
4033 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4035, __extension__
__PRETTY_FUNCTION__))
4034 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4035, __extension__
__PRETTY_FUNCTION__))
4035 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4035, __extension__
__PRETTY_FUNCTION__))
;
4036
4037 // Assign locations to all of the incoming arguments.
4038 SmallVector<CCValAssign, 16> ArgLocs;
4039 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4040
4041 // Allocate shadow area for Win64.
4042 if (IsWin64)
4043 CCInfo.AllocateStack(32, Align(8));
4044
4045 CCInfo.AnalyzeArguments(Ins, CC_X86);
4046
4047 // In vectorcall calling convention a second pass is required for the HVA
4048 // types.
4049 if (CallingConv::X86_VectorCall == CallConv) {
4050 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
4051 }
4052
4053 // The next loop assumes that the locations are in the same order of the
4054 // input arguments.
4055 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4056, __extension__
__PRETTY_FUNCTION__))
4056 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4056, __extension__
__PRETTY_FUNCTION__))
;
4057
4058 SDValue ArgValue;
4059 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
4060 ++I, ++InsIndex) {
4061 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4061, __extension__
__PRETTY_FUNCTION__))
;
4062 CCValAssign &VA = ArgLocs[I];
4063
4064 if (VA.isRegLoc()) {
4065 EVT RegVT = VA.getLocVT();
4066 if (VA.needsCustom()) {
4067 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4069, __extension__
__PRETTY_FUNCTION__))
4068 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4069, __extension__
__PRETTY_FUNCTION__))
4069 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4069, __extension__
__PRETTY_FUNCTION__))
;
4070
4071 // v64i1 values, in regcall calling convention, that are
4072 // compiled to 32 bit arch, are split up into two registers.
4073 ArgValue =
4074 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
4075 } else {
4076 const TargetRegisterClass *RC;
4077 if (RegVT == MVT::i8)
4078 RC = &X86::GR8RegClass;
4079 else if (RegVT == MVT::i16)
4080 RC = &X86::GR16RegClass;
4081 else if (RegVT == MVT::i32)
4082 RC = &X86::GR32RegClass;
4083 else if (Is64Bit && RegVT == MVT::i64)
4084 RC = &X86::GR64RegClass;
4085 else if (RegVT == MVT::f16)
4086 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
4087 else if (RegVT == MVT::f32)
4088 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
4089 else if (RegVT == MVT::f64)
4090 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
4091 else if (RegVT == MVT::f80)
4092 RC = &X86::RFP80RegClass;
4093 else if (RegVT == MVT::f128)
4094 RC = &X86::VR128RegClass;
4095 else if (RegVT.is512BitVector())
4096 RC = &X86::VR512RegClass;
4097 else if (RegVT.is256BitVector())
4098 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
4099 else if (RegVT.is128BitVector())
4100 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
4101 else if (RegVT == MVT::x86mmx)
4102 RC = &X86::VR64RegClass;
4103 else if (RegVT == MVT::v1i1)
4104 RC = &X86::VK1RegClass;
4105 else if (RegVT == MVT::v8i1)
4106 RC = &X86::VK8RegClass;
4107 else if (RegVT == MVT::v16i1)
4108 RC = &X86::VK16RegClass;
4109 else if (RegVT == MVT::v32i1)
4110 RC = &X86::VK32RegClass;
4111 else if (RegVT == MVT::v64i1)
4112 RC = &X86::VK64RegClass;
4113 else
4114 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4114)
;
4115
4116 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4117 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4118 }
4119
4120 // If this is an 8 or 16-bit value, it is really passed promoted to 32
4121 // bits. Insert an assert[sz]ext to capture this, then truncate to the
4122 // right size.
4123 if (VA.getLocInfo() == CCValAssign::SExt)
4124 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4125 DAG.getValueType(VA.getValVT()));
4126 else if (VA.getLocInfo() == CCValAssign::ZExt)
4127 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4128 DAG.getValueType(VA.getValVT()));
4129 else if (VA.getLocInfo() == CCValAssign::BCvt)
4130 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
4131
4132 if (VA.isExtInLoc()) {
4133 // Handle MMX values passed in XMM regs.
4134 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
4135 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
4136 else if (VA.getValVT().isVector() &&
4137 VA.getValVT().getScalarType() == MVT::i1 &&
4138 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
4139 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
4140 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
4141 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
4142 } else
4143 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4144 }
4145 } else {
4146 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4146, __extension__ __PRETTY_FUNCTION__))
;
4147 ArgValue =
4148 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
4149 }
4150
4151 // If value is passed via pointer - do a load.
4152 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
4153 ArgValue =
4154 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
4155
4156 InVals.push_back(ArgValue);
4157 }
4158
4159 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
4160 if (Ins[I].Flags.isSwiftAsync()) {
4161 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
4162 if (Subtarget.is64Bit())
4163 X86FI->setHasSwiftAsyncContext(true);
4164 else {
4165 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
4166 X86FI->setSwiftAsyncContextFrameIdx(FI);
4167 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
4168 DAG.getFrameIndex(FI, MVT::i32),
4169 MachinePointerInfo::getFixedStack(MF, FI));
4170 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
4171 }
4172 }
4173
4174 // Swift calling convention does not require we copy the sret argument
4175 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
4176 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
4177 continue;
4178
4179 // All x86 ABIs require that for returning structs by value we copy the
4180 // sret argument into %rax/%eax (depending on ABI) for the return. Save
4181 // the argument into a virtual register so that we can access it from the
4182 // return points.
4183 if (Ins[I].Flags.isSRet()) {
4184 assert(!FuncInfo->getSRetReturnReg() &&(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4185, __extension__
__PRETTY_FUNCTION__))
4185 "SRet return has already been set")(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4185, __extension__
__PRETTY_FUNCTION__))
;
4186 MVT PtrTy = getPointerTy(DAG.getDataLayout());
4187 Register Reg =
4188 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
4189 FuncInfo->setSRetReturnReg(Reg);
4190 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
4191 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
4192 break;
4193 }
4194 }
4195
4196 unsigned StackSize = CCInfo.getNextStackOffset();
4197 // Align stack specially for tail calls.
4198 if (shouldGuaranteeTCO(CallConv,
4199 MF.getTarget().Options.GuaranteedTailCallOpt))
4200 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
4201
4202 if (IsVarArg)
4203 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
4204 .lowerVarArgsParameters(Chain, StackSize);
4205
4206 // Some CCs need callee pop.
4207 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
4208 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4209 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
4210 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
4211 // X86 interrupts must pop the error code (and the alignment padding) if
4212 // present.
4213 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
4214 } else {
4215 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
4216 // If this is an sret function, the return should pop the hidden pointer.
4217 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
4218 FuncInfo->setBytesToPopOnReturn(4);
4219 }
4220
4221 if (!Is64Bit) {
4222 // RegSaveFrameIndex is X86-64 only.
4223 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4224 }
4225
4226 FuncInfo->setArgumentStackSize(StackSize);
4227
4228 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
4229 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
4230 if (Personality == EHPersonality::CoreCLR) {
4231 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "llvm/lib/Target/X86/X86ISelLowering.cpp", 4231,
__extension__ __PRETTY_FUNCTION__))
;
4232 // TODO: Add a mechanism to frame lowering that will allow us to indicate
4233 // that we'd prefer this slot be allocated towards the bottom of the frame
4234 // (i.e. near the stack pointer after allocating the frame). Every
4235 // funclet needs a copy of this slot in its (mostly empty) frame, and the
4236 // offset from the bottom of this and each funclet's frame must be the
4237 // same, so the size of funclets' (mostly empty) frames is dictated by
4238 // how far this slot is from the bottom (since they allocate just enough
4239 // space to accommodate holding this slot at the correct offset).
4240 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
4241 EHInfo->PSPSymFrameIdx = PSPSymFI;
4242 }
4243 }
4244
4245 if (CallConv == CallingConv::X86_RegCall ||
4246 F.hasFnAttribute("no_caller_saved_registers")) {
4247 MachineRegisterInfo &MRI = MF.getRegInfo();
4248 for (std::pair<Register, Register> Pair : MRI.liveins())
4249 MRI.disableCalleeSavedRegister(Pair.first);
4250 }
4251
4252 return Chain;
4253}
4254
4255SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
4256 SDValue Arg, const SDLoc &dl,
4257 SelectionDAG &DAG,
4258 const CCValAssign &VA,
4259 ISD::ArgFlagsTy Flags,
4260 bool isByVal) const {
4261 unsigned LocMemOffset = VA.getLocMemOffset();
4262 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4263 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4264 StackPtr, PtrOff);
4265 if (isByVal)
4266 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
4267
4268 MaybeAlign Alignment;
4269 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
4270 Arg.getSimpleValueType() != MVT::f80)
4271 Alignment = MaybeAlign(4);
4272 return DAG.getStore(
4273 Chain, dl, Arg, PtrOff,
4274 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
4275 Alignment);
4276}
4277
4278/// Emit a load of return address if tail call
4279/// optimization is performed and it is required.
4280SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
4281 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
4282 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
4283 // Adjust the Return address stack slot.
4284 EVT VT = getPointerTy(DAG.getDataLayout());
4285 OutRetAddr = getReturnAddressFrameIndex(DAG);
4286
4287 // Load the "old" Return address.
4288 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
4289 return SDValue(OutRetAddr.getNode(), 1);
4290}
4291
4292/// Emit a store of the return address if tail call
4293/// optimization is performed and it is required (FPDiff!=0).
4294static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
4295 SDValue Chain, SDValue RetAddrFrIdx,
4296 EVT PtrVT, unsigned SlotSize,
4297 int FPDiff, const SDLoc &dl) {
4298 // Store the return address to the appropriate stack slot.
4299 if (!FPDiff) return Chain;
4300 // Calculate the new stack slot for the return address.
4301 int NewReturnAddrFI =
4302 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
4303 false);
4304 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
4305 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
4306 MachinePointerInfo::getFixedStack(
4307 DAG.getMachineFunction(), NewReturnAddrFI));
4308 return Chain;
4309}
4310
4311/// Returns a vector_shuffle mask for an movs{s|d}, movd
4312/// operation of specified width.
4313static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
4314 SDValue V2) {
4315 unsigned NumElems = VT.getVectorNumElements();
4316 SmallVector<int, 8> Mask;
4317 Mask.push_back(NumElems);
4318 for (unsigned i = 1; i != NumElems; ++i)
4319 Mask.push_back(i);
4320 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
4321}
4322
4323SDValue
4324X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4325 SmallVectorImpl<SDValue> &InVals) const {
4326 SelectionDAG &DAG = CLI.DAG;
4327 SDLoc &dl = CLI.DL;
4328 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4329 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4330 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4331 SDValue Chain = CLI.Chain;
4332 SDValue Callee = CLI.Callee;
4333 CallingConv::ID CallConv = CLI.CallConv;
4334 bool &isTailCall = CLI.IsTailCall;
4335 bool isVarArg = CLI.IsVarArg;
4336 const auto *CB = CLI.CB;
4337
4338 MachineFunction &MF = DAG.getMachineFunction();
4339 bool Is64Bit = Subtarget.is64Bit();
4340 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4341 bool IsSibcall = false;
4342 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
4343 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
4344 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
4345 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
4346 bool HasNCSR = (CB && isa<CallInst>(CB) &&
4347 CB->hasFnAttr("no_caller_saved_registers"));
4348 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
4349 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
4350 bool IsCFICall = IsIndirectCall && CLI.CFIType;
4351 const Module *M = MF.getMMI().getModule();
4352 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
4353
4354 MachineFunction::CallSiteInfo CSInfo;
4355 if (CallConv == CallingConv::X86_INTR)
4356 report_fatal_error("X86 interrupts may not be called directly");
4357
4358 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
4359 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
4360 // If we are using a GOT, disable tail calls to external symbols with
4361 // default visibility. Tail calling such a symbol requires using a GOT
4362 // relocation, which forces early binding of the symbol. This breaks code
4363 // that require lazy function symbol resolution. Using musttail or
4364 // GuaranteedTailCallOpt will override this.
4365 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4366 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
4367 G->getGlobal()->hasDefaultVisibility()))
4368 isTailCall = false;
4369 }
4370
4371 if (isTailCall && !IsMustTail) {
4372 // Check if it's really possible to do a tail call.
4373 isTailCall = IsEligibleForTailCallOptimization(
4374 Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
4375 Ins, DAG);
4376
4377 // Sibcalls are automatically detected tailcalls which do not require
4378 // ABI changes.
4379 if (!IsGuaranteeTCO && isTailCall)
4380 IsSibcall = true;
4381
4382 if (isTailCall)
4383 ++NumTailCalls;
4384 }
4385
4386 if (IsMustTail && !isTailCall)
4387 report_fatal_error("failed to perform tail call elimination on a call "
4388 "site marked musttail");
4389
4390 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4391, __extension__
__PRETTY_FUNCTION__))
4391 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4391, __extension__
__PRETTY_FUNCTION__))
;
4392
4393 // Analyze operands of the call, assigning locations to each operand.
4394 SmallVector<CCValAssign, 16> ArgLocs;
4395 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4396
4397 // Allocate shadow area for Win64.
4398 if (IsWin64)
4399 CCInfo.AllocateStack(32, Align(8));
4400
4401 CCInfo.AnalyzeArguments(Outs, CC_X86);
4402
4403 // In vectorcall calling convention a second pass is required for the HVA
4404 // types.
4405 if (CallingConv::X86_VectorCall == CallConv) {
4406 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
4407 }
4408
4409 // Get a count of how many bytes are to be pushed on the stack.
4410 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
4411 if (IsSibcall)
4412 // This is a sibcall. The memory operands are available in caller's
4413 // own caller's stack.
4414 NumBytes = 0;
4415 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4416 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4417
4418 int FPDiff = 0;
4419 if (isTailCall &&
4420 shouldGuaranteeTCO(CallConv,
4421 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4422 // Lower arguments at fp - stackoffset + fpdiff.
4423 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4424
4425 FPDiff = NumBytesCallerPushed - NumBytes;
4426
4427 // Set the delta of movement of the returnaddr stackslot.
4428 // But only set if delta is greater than previous delta.
4429 if (FPDiff < X86Info->getTCReturnAddrDelta())
4430 X86Info->setTCReturnAddrDelta(FPDiff);
4431 }
4432
4433 unsigned NumBytesToPush = NumBytes;
4434 unsigned NumBytesToPop = NumBytes;
4435
4436 // If we have an inalloca argument, all stack space has already been allocated
4437 // for us and be right at the top of the stack. We don't support multiple
4438 // arguments passed in memory when using inalloca.
4439 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4440 NumBytesToPush = 0;
4441 if (!ArgLocs.back().isMemLoc())
4442 report_fatal_error("cannot use inalloca attribute on a register "
4443 "parameter");
4444 if (ArgLocs.back().getLocMemOffset() != 0)
4445 report_fatal_error("any parameter with the inalloca attribute must be "
4446 "the only memory argument");
4447 } else if (CLI.IsPreallocated) {
4448 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4450, __extension__
__PRETTY_FUNCTION__))
4449 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4450, __extension__
__PRETTY_FUNCTION__))
4450 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4450, __extension__
__PRETTY_FUNCTION__))
;
4451 SmallVector<size_t, 4> PreallocatedOffsets;
4452 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4453 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4454 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4455 }
4456 }
4457 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4458 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4459 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4460 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4461 NumBytesToPush = 0;
4462 }
4463
4464 if (!IsSibcall && !IsMustTail)
4465 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4466 NumBytes - NumBytesToPush, dl);
4467
4468 SDValue RetAddrFrIdx;
4469 // Load return address for tail calls.
4470 if (isTailCall && FPDiff)
4471 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4472 Is64Bit, FPDiff, dl);
4473
4474 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4475 SmallVector<SDValue, 8> MemOpChains;
4476 SDValue StackPtr;
4477
4478 // The next loop assumes that the locations are in the same order of the
4479 // input arguments.
4480 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4481, __extension__
__PRETTY_FUNCTION__))
4481 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4481, __extension__
__PRETTY_FUNCTION__))
;
4482
4483 // Walk the register/memloc assignments, inserting copies/loads. In the case
4484 // of tail call optimization arguments are handle later.
4485 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4486 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4487 ++I, ++OutIndex) {
4488 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4488, __extension__
__PRETTY_FUNCTION__))
;
4489 // Skip inalloca/preallocated arguments, they have already been written.
4490 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4491 if (Flags.isInAlloca() || Flags.isPreallocated())
4492 continue;
4493
4494 CCValAssign &VA = ArgLocs[I];
4495 EVT RegVT = VA.getLocVT();
4496 SDValue Arg = OutVals[OutIndex];
4497 bool isByVal = Flags.isByVal();
4498
4499 // Promote the value if needed.
4500 switch (VA.getLocInfo()) {
4501 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4501)
;
4502 case CCValAssign::Full: break;
4503 case CCValAssign::SExt:
4504 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4505 break;
4506 case CCValAssign::ZExt:
4507 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4508 break;
4509 case CCValAssign::AExt:
4510 if (Arg.getValueType().isVector() &&
4511 Arg.getValueType().getVectorElementType() == MVT::i1)
4512 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4513 else if (RegVT.is128BitVector()) {
4514 // Special case: passing MMX values in XMM registers.
4515 Arg = DAG.getBitcast(MVT::i64, Arg);
4516 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4517 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4518 } else
4519 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4520 break;
4521 case CCValAssign::BCvt:
4522 Arg = DAG.getBitcast(RegVT, Arg);
4523 break;
4524 case CCValAssign::Indirect: {
4525 if (isByVal) {
4526 // Memcpy the argument to a temporary stack slot to prevent
4527 // the caller from seeing any modifications the callee may make
4528 // as guaranteed by the `byval` attribute.
4529 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4530 Flags.getByValSize(),
4531 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4532 SDValue StackSlot =
4533 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4534 Chain =
4535 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4536 // From now on treat this as a regular pointer
4537 Arg = StackSlot;
4538 isByVal = false;
4539 } else {
4540 // Store the argument.
4541 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4542 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4543 Chain = DAG.getStore(
4544 Chain, dl, Arg, SpillSlot,
4545 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4546 Arg = SpillSlot;
4547 }
4548 break;
4549 }
4550 }
4551
4552 if (VA.needsCustom()) {
4553 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4554, __extension__
__PRETTY_FUNCTION__))
4554 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4554, __extension__
__PRETTY_FUNCTION__))
;
4555 // Split v64i1 value into two registers
4556 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4557 } else if (VA.isRegLoc()) {
4558 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4559 const TargetOptions &Options = DAG.getTarget().Options;
4560 if (Options.EmitCallSiteInfo)
4561 CSInfo.emplace_back(VA.getLocReg(), I);
4562 if (isVarArg && IsWin64) {
4563 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4564 // shadow reg if callee is a varargs function.
4565 Register ShadowReg;
4566 switch (VA.getLocReg()) {
4567 case X86::XMM0: ShadowReg = X86::RCX; break;
4568 case X86::XMM1: ShadowReg = X86::RDX; break;
4569 case X86::XMM2: ShadowReg = X86::R8; break;
4570 case X86::XMM3: ShadowReg = X86::R9; break;
4571 }
4572 if (ShadowReg)
4573 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4574 }
4575 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4576 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4576, __extension__ __PRETTY_FUNCTION__))
;
4577 if (!StackPtr.getNode())
4578 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4579 getPointerTy(DAG.getDataLayout()));
4580 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4581 dl, DAG, VA, Flags, isByVal));
4582 }
4583 }
4584
4585 if (!MemOpChains.empty())
4586 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4587
4588 if (Subtarget.isPICStyleGOT()) {
4589 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4590 // GOT pointer (except regcall).
4591 if (!isTailCall) {
4592 // Indirect call with RegCall calling convertion may use up all the
4593 // general registers, so it is not suitable to bind EBX reister for
4594 // GOT address, just let register allocator handle it.
4595 if (CallConv != CallingConv::X86_RegCall)
4596 RegsToPass.push_back(std::make_pair(
4597 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4598 getPointerTy(DAG.getDataLayout()))));
4599 } else {
4600 // If we are tail calling and generating PIC/GOT style code load the
4601 // address of the callee into ECX. The value in ecx is used as target of
4602 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4603 // for tail calls on PIC/GOT architectures. Normally we would just put the
4604 // address of GOT into ebx and then call target@PLT. But for tail calls
4605 // ebx would be restored (since ebx is callee saved) before jumping to the
4606 // target@PLT.
4607
4608 // Note: The actual moving to ECX is done further down.
4609 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4610 if (G && !G->getGlobal()->hasLocalLinkage() &&
4611 G->getGlobal()->hasDefaultVisibility())
4612 Callee = LowerGlobalAddress(Callee, DAG);
4613 else if (isa<ExternalSymbolSDNode>(Callee))
4614 Callee = LowerExternalSymbol(Callee, DAG);
4615 }
4616 }
4617
4618 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
4619 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
4620 // From AMD64 ABI document:
4621 // For calls that may call functions that use varargs or stdargs
4622 // (prototype-less calls or calls to functions containing ellipsis (...) in
4623 // the declaration) %al is used as hidden argument to specify the number
4624 // of SSE registers used. The contents of %al do not need to match exactly
4625 // the number of registers, but must be an ubound on the number of SSE
4626 // registers used and is in the range 0 - 8 inclusive.
4627
4628 // Count the number of XMM registers allocated.
4629 static const MCPhysReg XMMArgRegs[] = {
4630 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4631 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4632 };
4633 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4634 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4635, __extension__
__PRETTY_FUNCTION__))
4635 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4635, __extension__
__PRETTY_FUNCTION__))
;
4636 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4637 DAG.getConstant(NumXMMRegs, dl,
4638 MVT::i8)));
4639 }
4640
4641 if (isVarArg && IsMustTail) {
4642 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4643 for (const auto &F : Forwards) {
4644 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4645 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4646 }
4647 }
4648
4649 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4650 // don't need this because the eligibility check rejects calls that require
4651 // shuffling arguments passed in memory.
4652 if (!IsSibcall && isTailCall) {
4653 // Force all the incoming stack arguments to be loaded from the stack
4654 // before any new outgoing arguments are stored to the stack, because the
4655 // outgoing stack slots may alias the incoming argument stack slots, and
4656 // the alias isn't otherwise explicit. This is slightly more conservative
4657 // than necessary, because it means that each store effectively depends
4658 // on every argument instead of just those arguments it would clobber.
4659 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4660
4661 SmallVector<SDValue, 8> MemOpChains2;
4662 SDValue FIN;
4663 int FI = 0;
4664 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4665 ++I, ++OutsIndex) {
4666 CCValAssign &VA = ArgLocs[I];
4667
4668 if (VA.isRegLoc()) {
4669 if (VA.needsCustom()) {
4670 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4671, __extension__
__PRETTY_FUNCTION__))
4671 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4671, __extension__
__PRETTY_FUNCTION__))
;
4672 // This means that we are in special case where one argument was
4673 // passed through two register locations - Skip the next location
4674 ++I;
4675 }
4676
4677 continue;
4678 }
4679
4680 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4680, __extension__ __PRETTY_FUNCTION__))
;
4681 SDValue Arg = OutVals[OutsIndex];
4682 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4683 // Skip inalloca/preallocated arguments. They don't require any work.
4684 if (Flags.isInAlloca() || Flags.isPreallocated())
4685 continue;
4686 // Create frame index.
4687 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4688 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4689 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4690 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4691
4692 if (Flags.isByVal()) {
4693 // Copy relative to framepointer.
4694 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4695 if (!StackPtr.getNode())
4696 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4697 getPointerTy(DAG.getDataLayout()));
4698 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4699 StackPtr, Source);
4700
4701 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4702 ArgChain,
4703 Flags, DAG, dl));
4704 } else {
4705 // Store relative to framepointer.
4706 MemOpChains2.push_back(DAG.getStore(
4707 ArgChain, dl, Arg, FIN,
4708 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4709 }
4710 }
4711
4712 if (!MemOpChains2.empty())
4713 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4714
4715 // Store the return address to the appropriate stack slot.
4716 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4717 getPointerTy(DAG.getDataLayout()),
4718 RegInfo->getSlotSize(), FPDiff, dl);
4719 }
4720
4721 // Build a sequence of copy-to-reg nodes chained together with token chain
4722 // and flag operands which copy the outgoing args into registers.
4723 SDValue InFlag;
4724 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4725 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4726 RegsToPass[i].second, InFlag);
4727 InFlag = Chain.getValue(1);
4728 }
4729
4730 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4731 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4731, __extension__
__PRETTY_FUNCTION__))
;
4732 // In the 64-bit large code model, we have to make all calls
4733 // through a register, since the call instruction's 32-bit
4734 // pc-relative offset may not be large enough to hold the whole
4735 // address.
4736 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4737 Callee->getOpcode() == ISD::ExternalSymbol) {
4738 // Lower direct calls to global addresses and external symbols. Setting
4739 // ForCall to true here has the effect of removing WrapperRIP when possible
4740 // to allow direct calls to be selected without first materializing the
4741 // address into a register.
4742 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4743 } else if (Subtarget.isTarget64BitILP32() &&
4744 Callee.getValueType() == MVT::i32) {
4745 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4746 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4747 }
4748
4749 // Returns a chain & a flag for retval copy to use.
4750 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4751 SmallVector<SDValue, 8> Ops;
4752
4753 if (!IsSibcall && isTailCall && !IsMustTail) {
4754 Chain = DAG.getCALLSEQ_END(Chain,
4755 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4756 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4757 InFlag = Chain.getValue(1);
4758 }
4759
4760 Ops.push_back(Chain);
4761 Ops.push_back(Callee);
4762
4763 if (isTailCall)
4764 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4765
4766 // Add argument registers to the end of the list so that they are known live
4767 // into the call.
4768 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4769 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4770 RegsToPass[i].second.getValueType()));
4771
4772 // Add a register mask operand representing the call-preserved registers.
4773 const uint32_t *Mask = [&]() {
4774 auto AdaptedCC = CallConv;
4775 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4776 // use X86_INTR calling convention because it has the same CSR mask
4777 // (same preserved registers).
4778 if (HasNCSR)
4779 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4780 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4781 // to use the CSR_NoRegs_RegMask.
4782 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4783 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4784 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4785 }();
4786 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4786, __extension__
__PRETTY_FUNCTION__))
;
4787
4788 // If this is an invoke in a 32-bit function using a funclet-based
4789 // personality, assume the function clobbers all registers. If an exception
4790 // is thrown, the runtime will not restore CSRs.
4791 // FIXME: Model this more precisely so that we can register allocate across
4792 // the normal edge and spill and fill across the exceptional edge.
4793 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4794 const Function &CallerFn = MF.getFunction();
4795 EHPersonality Pers =
4796 CallerFn.hasPersonalityFn()
4797 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4798 : EHPersonality::Unknown;
4799 if (isFuncletEHPersonality(Pers))
4800 Mask = RegInfo->getNoPreservedMask();
4801 }
4802
4803 // Define a new register mask from the existing mask.
4804 uint32_t *RegMask = nullptr;
4805
4806 // In some calling conventions we need to remove the used physical registers
4807 // from the reg mask.
4808 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4809 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4810
4811 // Allocate a new Reg Mask and copy Mask.
4812 RegMask = MF.allocateRegMask();
4813 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4814 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4815
4816 // Make sure all sub registers of the argument registers are reset
4817 // in the RegMask.
4818 for (auto const &RegPair : RegsToPass)
4819 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4820 SubRegs.isValid(); ++SubRegs)
4821 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4822
4823 // Create the RegMask Operand according to our updated mask.
4824 Ops.push_back(DAG.getRegisterMask(RegMask));
4825 } else {
4826 // Create the RegMask Operand according to the static mask.
4827 Ops.push_back(DAG.getRegisterMask(Mask));
4828 }
4829
4830 if (InFlag.getNode())
4831 Ops.push_back(InFlag);
4832
4833 if (isTailCall) {
4834 // We used to do:
4835 //// If this is the first return lowered for this function, add the regs
4836 //// to the liveout set for the function.
4837 // This isn't right, although it's probably harmless on x86; liveouts
4838 // should be computed from returns not tail calls. Consider a void
4839 // function making a tail call to a function returning int.
4840 MF.getFrameInfo().setHasTailCall();
4841 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4842
4843 if (IsCFICall)
4844 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4845
4846 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4847 return Ret;
4848 }
4849
4850 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4851 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4852 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4853 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4854 // expanded to the call, directly followed by a special marker sequence and
4855 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4856 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4857, __extension__
__PRETTY_FUNCTION__))
4857 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4857, __extension__
__PRETTY_FUNCTION__))
;
4858 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4858, __extension__
__PRETTY_FUNCTION__))
;
4859
4860 // Add a target global address for the retainRV/claimRV runtime function
4861 // just before the call target.
4862 Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
4863 auto PtrVT = getPointerTy(DAG.getDataLayout());
4864 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
4865 Ops.insert(Ops.begin() + 1, GA);
4866 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4867 } else {
4868 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4869 }
4870
4871 if (IsCFICall)
4872 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
4873
4874 InFlag = Chain.getValue(1);
4875 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4876 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4877
4878 // Save heapallocsite metadata.
4879 if (CLI.CB)
4880 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4881 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4882
4883 // Create the CALLSEQ_END node.
4884 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
4885 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4886 DAG.getTarget().Options.GuaranteedTailCallOpt))
4887 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4888 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
4889 // If this call passes a struct-return pointer, the callee
4890 // pops that struct pointer.
4891 NumBytesForCalleeToPop = 4;
4892
4893 // Returns a flag for retval copy to use.
4894 if (!IsSibcall) {
4895 Chain = DAG.getCALLSEQ_END(Chain,
4896 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4897 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4898 true),
4899 InFlag, dl);
4900 InFlag = Chain.getValue(1);
4901 }
4902
4903 // Handle result values, copying them out of physregs into vregs that we
4904 // return.
4905 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4906 InVals, RegMask);
4907}
4908
4909//===----------------------------------------------------------------------===//
4910// Fast Calling Convention (tail call) implementation
4911//===----------------------------------------------------------------------===//
4912
4913// Like std call, callee cleans arguments, convention except that ECX is
4914// reserved for storing the tail called function address. Only 2 registers are
4915// free for argument passing (inreg). Tail call optimization is performed
4916// provided:
4917// * tailcallopt is enabled
4918// * caller/callee are fastcc
4919// On X86_64 architecture with GOT-style position independent code only local
4920// (within module) calls are supported at the moment.
4921// To keep the stack aligned according to platform abi the function
4922// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4923// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4924// If a tail called function callee has more arguments than the caller the
4925// caller needs to make sure that there is room to move the RETADDR to. This is
4926// achieved by reserving an area the size of the argument delta right after the
4927// original RETADDR, but before the saved framepointer or the spilled registers
4928// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4929// stack layout:
4930// arg1
4931// arg2
4932// RETADDR
4933// [ new RETADDR
4934// move area ]
4935// (possible EBP)
4936// ESI
4937// EDI
4938// local1 ..
4939
4940/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4941/// requirement.
4942unsigned
4943X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4944 SelectionDAG &DAG) const {
4945 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4946 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4947 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4948, __extension__
__PRETTY_FUNCTION__))
4948 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4948, __extension__
__PRETTY_FUNCTION__))
;
4949 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4950}
4951
4952/// Return true if the given stack call argument is already available in the
4953/// same position (relatively) of the caller's incoming argument stack.
4954static
4955bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4956 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4957 const X86InstrInfo *TII, const CCValAssign &VA) {
4958 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4959
4960 for (;;) {
4961 // Look through nodes that don't alter the bits of the incoming value.
4962 unsigned Op = Arg.getOpcode();
4963 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4964 Arg = Arg.getOperand(0);
4965 continue;
4966 }
4967 if (Op == ISD::TRUNCATE) {
4968 const SDValue &TruncInput = Arg.getOperand(0);
4969 if (TruncInput.getOpcode() == ISD::AssertZext &&
4970 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4971 Arg.getValueType()) {
4972 Arg = TruncInput.getOperand(0);
4973 continue;
4974 }
4975 }
4976 break;
4977 }
4978
4979 int FI = INT_MAX2147483647;
4980 if (Arg.getOpcode() == ISD::CopyFromReg) {
4981 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4982 if (!VR.isVirtual())
4983 return false;
4984 MachineInstr *Def = MRI->getVRegDef(VR);
4985 if (!Def)
4986 return false;
4987 if (!Flags.isByVal()) {
4988 if (!TII->isLoadFromStackSlot(*Def, FI))
4989 return false;
4990 } else {
4991 unsigned Opcode = Def->getOpcode();
4992 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4993 Opcode == X86::LEA64_32r) &&
4994 Def->getOperand(1).isFI()) {
4995 FI = Def->getOperand(1).getIndex();
4996 Bytes = Flags.getByValSize();
4997 } else
4998 return false;
4999 }
5000 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
5001 if (Flags.isByVal())
5002 // ByVal argument is passed in as a pointer but it's now being
5003 // dereferenced. e.g.
5004 // define @foo(%struct.X* %A) {
5005 // tail call @bar(%struct.X* byval %A)
5006 // }
5007 return false;
5008 SDValue Ptr = Ld->getBasePtr();
5009 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
5010 if (!FINode)
5011 return false;
5012 FI = FINode->getIndex();
5013 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
5014 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
5015 FI = FINode->getIndex();
5016 Bytes = Flags.getByValSize();
5017 } else
5018 return false;
5019
5020 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "llvm/lib/Target/X86/X86ISelLowering.cpp",
5020, __extension__ __PRETTY_FUNCTION__))
;
5021 if (!MFI.isFixedObjectIndex(FI))
5022 return false;
5023
5024 if (Offset != MFI.getObjectOffset(FI))
5025 return false;
5026
5027 // If this is not byval, check that the argument stack object is immutable.
5028 // inalloca and argument copy elision can create mutable argument stack
5029 // objects. Byval objects can be mutated, but a byval call intends to pass the
5030 // mutated memory.
5031 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
5032 return false;
5033
5034 if (VA.getLocVT().getFixedSizeInBits() >
5035 Arg.getValueSizeInBits().getFixedSize()) {
5036 // If the argument location is wider than the argument type, check that any
5037 // extension flags match.
5038 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
5039 Flags.isSExt() != MFI.isObjectSExt(FI)) {
5040 return false;
5041 }
5042 }
5043
5044 return Bytes == MFI.getObjectSize(FI);
5045}
5046
5047/// Check whether the call is eligible for tail call optimization. Targets
5048/// that want to do tail call optimization should implement this function.
5049bool X86TargetLowering::IsEligibleForTailCallOptimization(
5050 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
5051 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
5052 const SmallVectorImpl<SDValue> &OutVals,
5053 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5054 if (!mayTailCallThisCC(CalleeCC))
5055 return false;
5056
5057 // If -tailcallopt is specified, make fastcc functions tail-callable.
5058 MachineFunction &MF = DAG.getMachineFunction();
5059 const Function &CallerF = MF.getFunction();
5060
5061 // If the function return type is x86_fp80 and the callee return type is not,
5062 // then the FP_EXTEND of the call result is not a nop. It's not safe to
5063 // perform a tailcall optimization here.
5064 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
5065 return false;
5066
5067 CallingConv::ID CallerCC = CallerF.getCallingConv();
5068 bool CCMatch = CallerCC == CalleeCC;
5069 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
5070 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
5071 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
5072 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
5073
5074 // Win64 functions have extra shadow space for argument homing. Don't do the
5075 // sibcall if the caller and callee have mismatched expectations for this
5076 // space.
5077 if (IsCalleeWin64 != IsCallerWin64)
5078 return false;
5079
5080 if (IsGuaranteeTCO) {
5081 if (canGuaranteeTCO(CalleeCC) && CCMatch)
5082 return true;
5083 return false;
5084 }
5085
5086 // Look for obvious safe cases to perform tail call optimization that do not
5087 // require ABI changes. This is what gcc calls sibcall.
5088
5089 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
5090 // emit a special epilogue.
5091 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5092 if (RegInfo->hasStackRealignment(MF))
5093 return false;
5094
5095 // Also avoid sibcall optimization if we're an sret return fn and the callee
5096 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
5097 // insufficient.
5098 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
5099 // For a compatible tail call the callee must return our sret pointer. So it
5100 // needs to be (a) an sret function itself and (b) we pass our sret as its
5101 // sret. Condition #b is harder to determine.
5102 return false;
5103 } else if (IsCalleePopSRet)
5104 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
5105 // expect that.
5106 return false;
5107
5108 // Do not sibcall optimize vararg calls unless all arguments are passed via
5109 // registers.
5110 LLVMContext &C = *DAG.getContext();
5111 if (isVarArg && !Outs.empty()) {
5112 // Optimizing for varargs on Win64 is unlikely to be safe without
5113 // additional testing.
5114 if (IsCalleeWin64 || IsCallerWin64)
5115 return false;
5116
5117 SmallVector<CCValAssign, 16> ArgLocs;
5118 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5119
5120 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5121 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
5122 if (!ArgLocs[i].isRegLoc())
5123 return false;
5124 }
5125
5126 // If the call result is in ST0 / ST1, it needs to be popped off the x87
5127 // stack. Therefore, if it's not used by the call it is not safe to optimize
5128 // this into a sibcall.
5129 bool Unused = false;
5130 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5131 if (!Ins[i].Used) {
5132 Unused = true;
5133 break;
5134 }
5135 }
5136 if (Unused) {
5137 SmallVector<CCValAssign, 16> RVLocs;
5138 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
5139 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
5140 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5141 CCValAssign &VA = RVLocs[i];
5142 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
5143 return false;
5144 }
5145 }
5146
5147 // Check that the call results are passed in the same way.
5148 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5149 RetCC_X86, RetCC_X86))
5150 return false;
5151 // The callee has to preserve all registers the caller needs to preserve.
5152 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
5153 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5154 if (!CCMatch) {
5155 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5156 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5157 return false;
5158 }
5159
5160 unsigned StackArgsSize = 0;
5161
5162 // If the callee takes no arguments then go on to check the results of the
5163 // call.
5164 if (!Outs.empty()) {
5165 // Check if stack adjustment is needed. For now, do not do this if any
5166 // argument is passed on the stack.
5167 SmallVector<CCValAssign, 16> ArgLocs;
5168 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5169
5170 // Allocate shadow area for Win64
5171 if (IsCalleeWin64)
5172 CCInfo.AllocateStack(32, Align(8));
5173
5174 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
5175 StackArgsSize = CCInfo.getNextStackOffset();
5176
5177 if (CCInfo.getNextStackOffset()) {
5178 // Check if the arguments are already laid out in the right way as
5179 // the caller's fixed stack objects.
5180 MachineFrameInfo &MFI = MF.getFrameInfo();
5181 const MachineRegisterInfo *MRI = &MF.getRegInfo();
5182 const X86InstrInfo *TII = Subtarget.getInstrInfo();
5183 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5184 CCValAssign &VA = ArgLocs[i];
5185 SDValue Arg = OutVals[i];
5186 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5187 if (VA.getLocInfo() == CCValAssign::Indirect)
5188 return false;
5189 if (!VA.isRegLoc()) {
5190 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
5191 MFI, MRI, TII, VA))
5192 return false;
5193 }
5194 }
5195 }
5196
5197 bool PositionIndependent = isPositionIndependent();
5198 // If the tailcall address may be in a register, then make sure it's
5199 // possible to register allocate for it. In 32-bit, the call address can
5200 // only target EAX, EDX, or ECX since the tail call must be scheduled after
5201 // callee-saved registers are restored. These happen to be the same
5202 // registers used to pass 'inreg' arguments so watch out for those.
5203 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
5204 !isa<ExternalSymbolSDNode>(Callee)) ||
5205 PositionIndependent)) {
5206 unsigned NumInRegs = 0;
5207 // In PIC we need an extra register to formulate the address computation
5208 // for the callee.
5209 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
5210
5211 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5212 CCValAssign &VA = ArgLocs[i];
5213 if (!VA.isRegLoc())
5214 continue;
5215 Register Reg = VA.getLocReg();
5216 switch (Reg) {
5217 default: break;
5218 case X86::EAX: case X86::EDX: case X86::ECX:
5219 if (++NumInRegs == MaxInRegs)
5220 return false;
5221 break;
5222 }
5223 }
5224 }
5225
5226 const MachineRegisterInfo &MRI = MF.getRegInfo();
5227 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5228 return false;
5229 }
5230
5231 bool CalleeWillPop =
5232 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
5233 MF.getTarget().Options.GuaranteedTailCallOpt);
5234
5235 if (unsigned BytesToPop =
5236 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
5237 // If we have bytes to pop, the callee must pop them.
5238 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
5239 if (!CalleePopMatches)
5240 return false;
5241 } else if (CalleeWillPop && StackArgsSize > 0) {
5242 // If we don't have bytes to pop, make sure the callee doesn't pop any.
5243 return false;
5244 }
5245
5246 return true;
5247}
5248
5249FastISel *
5250X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
5251 const TargetLibraryInfo *libInfo) const {
5252 return X86::createFastISel(funcInfo, libInfo);
5253}
5254
5255//===----------------------------------------------------------------------===//
5256// Other Lowering Hooks
5257//===----------------------------------------------------------------------===//
5258
5259bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
5260 bool AssumeSingleUse) {
5261 if (!AssumeSingleUse && !Op.hasOneUse())
5262 return false;
5263 if (!ISD::isNormalLoad(Op.getNode()))
5264 return false;
5265
5266 // If this is an unaligned vector, make sure the target supports folding it.
5267 auto *Ld = cast<LoadSDNode>(Op.getNode());
5268 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
5269 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
5270 return false;
5271
5272 // TODO: If this is a non-temporal load and the target has an instruction
5273 // for it, it should not be folded. See "useNonTemporalLoad()".
5274
5275 return true;
5276}
5277
5278bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
5279 const X86Subtarget &Subtarget,
5280 bool AssumeSingleUse) {
5281 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory")(static_cast <bool> (Subtarget.hasAVX() && "Expected AVX for broadcast from memory"
) ? void (0) : __assert_fail ("Subtarget.hasAVX() && \"Expected AVX for broadcast from memory\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5281, __extension__
__PRETTY_FUNCTION__))
;
5282 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
5283 return false;
5284
5285 // We can not replace a wide volatile load with a broadcast-from-memory,
5286 // because that would narrow the load, which isn't legal for volatiles.
5287 auto *Ld = cast<LoadSDNode>(Op.getNode());
5288 return !Ld->isVolatile() ||
5289 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
5290}
5291
5292bool X86::mayFoldIntoStore(SDValue Op) {
5293 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
5294}
5295
5296bool X86::mayFoldIntoZeroExtend(SDValue Op) {
5297 if (Op.hasOneUse()) {
5298 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
5299 return (ISD::ZERO_EXTEND == Opcode);
5300 }
5301 return false;
5302}
5303
5304static bool isTargetShuffle(unsigned Opcode) {
5305 switch(Opcode) {
5306 default: return false;
5307 case X86ISD::BLENDI:
5308 case X86ISD::PSHUFB:
5309 case X86ISD::PSHUFD:
5310 case X86ISD::PSHUFHW:
5311 case X86ISD::PSHUFLW:
5312 case X86ISD::SHUFP:
5313 case X86ISD::INSERTPS:
5314 case X86ISD::EXTRQI:
5315 case X86ISD::INSERTQI:
5316 case X86ISD::VALIGN:
5317 case X86ISD::PALIGNR:
5318 case X86ISD::VSHLDQ:
5319 case X86ISD::VSRLDQ:
5320 case X86ISD::MOVLHPS:
5321 case X86ISD::MOVHLPS:
5322 case X86ISD::MOVSHDUP:
5323 case X86ISD::MOVSLDUP:
5324 case X86ISD::MOVDDUP:
5325 case X86ISD::MOVSS:
5326 case X86ISD::MOVSD:
5327 case X86ISD::MOVSH:
5328 case X86ISD::UNPCKL:
5329 case X86ISD::UNPCKH:
5330 case X86ISD::VBROADCAST:
5331 case X86ISD::VPERMILPI:
5332 case X86ISD::VPERMILPV:
5333 case X86ISD::VPERM2X128:
5334 case X86ISD::SHUF128:
5335 case X86ISD::VPERMIL2:
5336 case X86ISD::VPERMI:
5337 case X86ISD::VPPERM:
5338 case X86ISD::VPERMV:
5339 case X86ISD::VPERMV3:
5340 case X86ISD::VZEXT_MOVL:
5341 return true;
5342 }
5343}
5344
5345static bool isTargetShuffleVariableMask(unsigned Opcode) {
5346 switch (Opcode) {
5347 default: return false;
5348 // Target Shuffles.
5349 case X86ISD::PSHUFB:
5350 case X86ISD::VPERMILPV:
5351 case X86ISD::VPERMIL2:
5352 case X86ISD::VPPERM:
5353 case X86ISD::VPERMV:
5354 case X86ISD::VPERMV3:
5355 return true;
5356 // 'Faux' Target Shuffles.
5357 case ISD::OR:
5358 case ISD::AND:
5359 case X86ISD::ANDNP:
5360 return true;
5361 }
5362}
5363
5364SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
5365 MachineFunction &MF = DAG.getMachineFunction();
5366 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5367 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
5368 int ReturnAddrIndex = FuncInfo->getRAIndex();
5369
5370 if (ReturnAddrIndex == 0) {
5371 // Set up a frame object for the return address.
5372 unsigned SlotSize = RegInfo->getSlotSize();
5373 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
5374 -(int64_t)SlotSize,
5375 false);
5376 FuncInfo->setRAIndex(ReturnAddrIndex);
5377 }
5378
5379 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
5380}
5381
5382bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
5383 bool hasSymbolicDisplacement) {
5384 // Offset should fit into 32 bit immediate field.
5385 if (!isInt<32>(Offset))
5386 return false;
5387
5388 // If we don't have a symbolic displacement - we don't have any extra
5389 // restrictions.
5390 if (!hasSymbolicDisplacement)
5391 return true;
5392
5393 // FIXME: Some tweaks might be needed for medium code model.
5394 if (M != CodeModel::Small && M != CodeModel::Kernel)
5395 return false;
5396
5397 // For small code model we assume that latest object is 16MB before end of 31
5398 // bits boundary. We may also accept pretty large negative constants knowing
5399 // that all objects are in the positive half of address space.
5400 if (M == CodeModel::Small && Offset < 16*1024*1024)
5401 return true;
5402
5403 // For kernel code model we know that all object resist in the negative half
5404 // of 32bits address space. We may not accept negative offsets, since they may
5405 // be just off and we may accept pretty large positive ones.
5406 if (M == CodeModel::Kernel && Offset >= 0)
5407 return true;
5408
5409 return false;
5410}
5411
5412/// Determines whether the callee is required to pop its own arguments.
5413/// Callee pop is necessary to support tail calls.
5414bool X86::isCalleePop(CallingConv::ID CallingConv,
5415 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
5416 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
5417 // can guarantee TCO.
5418 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
5419 return true;
5420
5421 switch (CallingConv) {
5422 default:
5423 return false;
5424 case CallingConv::X86_StdCall:
5425 case CallingConv::X86_FastCall:
5426 case CallingConv::X86_ThisCall:
5427 case CallingConv::X86_VectorCall:
5428 return !is64Bit;
5429 }
5430}
5431
5432/// Return true if the condition is an signed comparison operation.
5433static bool isX86CCSigned(unsigned X86CC) {
5434 switch (X86CC) {
5435 default:
5436 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5436)
;
5437 case X86::COND_E:
5438 case X86::COND_NE:
5439 case X86::COND_B:
5440 case X86::COND_A:
5441 case X86::COND_BE:
5442 case X86::COND_AE:
5443 return false;
5444 case X86::COND_G:
5445 case X86::COND_GE:
5446 case X86::COND_L:
5447 case X86::COND_LE:
5448 return true;
5449 }
5450}
5451
5452static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5453 switch (SetCCOpcode) {
5454 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5454)
;
5455 case ISD::SETEQ: return X86::COND_E;
5456 case ISD::SETGT: return X86::COND_G;
5457 case ISD::SETGE: return X86::COND_GE;
5458 case ISD::SETLT: return X86::COND_L;
5459 case ISD::SETLE: return X86::COND_LE;
5460 case ISD::SETNE: return X86::COND_NE;
5461 case ISD::SETULT: return X86::COND_B;
5462 case ISD::SETUGT: return X86::COND_A;
5463 case ISD::SETULE: return X86::COND_BE;
5464 case ISD::SETUGE: return X86::COND_AE;
5465 }
5466}
5467
5468/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5469/// condition code, returning the condition code and the LHS/RHS of the
5470/// comparison to make.
5471static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5472 bool isFP, SDValue &LHS, SDValue &RHS,
5473 SelectionDAG &DAG) {
5474 if (!isFP) {
5475 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5476 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
5477 // X > -1 -> X == 0, jump !sign.
5478 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5479 return X86::COND_NS;
5480 }
5481 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
5482 // X < 0 -> X == 0, jump on sign.
5483 return X86::COND_S;
5484 }
5485 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
5486 // X >= 0 -> X == 0, jump on !sign.
5487 return X86::COND_NS;
5488 }
5489 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5490 // X < 1 -> X <= 0
5491 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5492 return X86::COND_LE;
5493 }
5494 }
5495
5496 return TranslateIntegerX86CC(SetCCOpcode);
5497 }
5498
5499 // First determine if it is required or is profitable to flip the operands.
5500
5501 // If LHS is a foldable load, but RHS is not, flip the condition.
5502 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5503 !ISD::isNON_EXTLoad(RHS.getNode())) {
5504 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5505 std::swap(LHS, RHS);
5506 }
5507
5508 switch (SetCCOpcode) {
5509 default: break;
5510 case ISD::SETOLT:
5511 case ISD::SETOLE:
5512 case ISD::SETUGT:
5513 case ISD::SETUGE:
5514 std::swap(LHS, RHS);
5515 break;
5516 }
5517
5518 // On a floating point condition, the flags are set as follows:
5519 // ZF PF CF op
5520 // 0 | 0 | 0 | X > Y
5521 // 0 | 0 | 1 | X < Y
5522 // 1 | 0 | 0 | X == Y
5523 // 1 | 1 | 1 | unordered
5524 switch (SetCCOpcode) {
5525 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5525)
;
5526 case ISD::SETUEQ:
5527 case ISD::SETEQ: return X86::COND_E;
5528 case ISD::SETOLT: // flipped
5529 case ISD::SETOGT:
5530 case ISD::SETGT: return X86::COND_A;
5531 case ISD::SETOLE: // flipped
5532 case ISD::SETOGE:
5533 case ISD::SETGE: return X86::COND_AE;
5534 case ISD::SETUGT: // flipped
5535 case ISD::SETULT:
5536 case ISD::SETLT: return X86::COND_B;
5537 case ISD::SETUGE: // flipped
5538 case ISD::SETULE:
5539 case ISD::SETLE: return X86::COND_BE;
5540 case ISD::SETONE:
5541 case ISD::SETNE: return X86::COND_NE;
5542 case ISD::SETUO: return X86::COND_P;
5543 case ISD::SETO: return X86::COND_NP;
5544 case ISD::SETOEQ:
5545 case ISD::SETUNE: return X86::COND_INVALID;
5546 }
5547}
5548
5549/// Is there a floating point cmov for the specific X86 condition code?
5550/// Current x86 isa includes the following FP cmov instructions:
5551/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5552static bool hasFPCMov(unsigned X86CC) {
5553 switch (X86CC) {
5554 default:
5555 return false;
5556 case X86::COND_B:
5557 case X86::COND_BE:
5558 case X86::COND_E:
5559 case X86::COND_P:
5560 case X86::COND_A:
5561 case X86::COND_AE:
5562 case X86::COND_NE:
5563 case X86::COND_NP:
5564 return true;
5565 }
5566}
5567
5568static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
5569 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
5570 VT.is512BitVector();
5571}
5572
5573bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5574 const CallInst &I,
5575 MachineFunction &MF,
5576 unsigned Intrinsic) const {
5577 Info.flags = MachineMemOperand::MONone;
5578 Info.offset = 0;
5579
5580 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5581 if (!IntrData) {
5582 switch (Intrinsic) {
5583 case Intrinsic::x86_aesenc128kl:
5584 case Intrinsic::x86_aesdec128kl:
5585 Info.opc = ISD::INTRINSIC_W_CHAIN;
5586 Info.ptrVal = I.getArgOperand(1);
5587 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5588 Info.align = Align(1);
5589 Info.flags |= MachineMemOperand::MOLoad;
5590 return true;
5591 case Intrinsic::x86_aesenc256kl:
5592 case Intrinsic::x86_aesdec256kl:
5593 Info.opc = ISD::INTRINSIC_W_CHAIN;
5594 Info.ptrVal = I.getArgOperand(1);
5595 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5596 Info.align = Align(1);
5597 Info.flags |= MachineMemOperand::MOLoad;
5598 return true;
5599 case Intrinsic::x86_aesencwide128kl:
5600 case Intrinsic::x86_aesdecwide128kl:
5601 Info.opc = ISD::INTRINSIC_W_CHAIN;
5602 Info.ptrVal = I.getArgOperand(0);
5603 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5604 Info.align = Align(1);
5605 Info.flags |= MachineMemOperand::MOLoad;
5606 return true;
5607 case Intrinsic::x86_aesencwide256kl:
5608 case Intrinsic::x86_aesdecwide256kl:
5609 Info.opc = ISD::INTRINSIC_W_CHAIN;
5610 Info.ptrVal = I.getArgOperand(0);
5611 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5612 Info.align = Align(1);
5613 Info.flags |= MachineMemOperand::MOLoad;
5614 return true;
5615 case Intrinsic::x86_atomic_bts:
5616 case Intrinsic::x86_atomic_btc:
5617 case Intrinsic::x86_atomic_btr: {
5618 Info.opc = ISD::INTRINSIC_W_CHAIN;
5619 Info.ptrVal = I.getArgOperand(0);
5620 unsigned Size = I.getType()->getScalarSizeInBits();
5621 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5622 Info.align = Align(Size);
5623 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5624 MachineMemOperand::MOVolatile;
5625 return true;
5626 }
5627 }
5628 return false;
5629 }
5630
5631 switch (IntrData->Type) {
5632 case TRUNCATE_TO_MEM_VI8:
5633 case TRUNCATE_TO_MEM_VI16:
5634 case TRUNCATE_TO_MEM_VI32: {
5635 Info.opc = ISD::INTRINSIC_VOID;
5636 Info.ptrVal = I.getArgOperand(0);
5637 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5638 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5639 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5640 ScalarVT = MVT::i8;
5641 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5642 ScalarVT = MVT::i16;
5643 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5644 ScalarVT = MVT::i32;
5645
5646 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5647 Info.align = Align(1);
5648 Info.flags |= MachineMemOperand::MOStore;
5649 break;
5650 }
5651 case GATHER:
5652 case GATHER_AVX2: {
5653 Info.opc = ISD::INTRINSIC_W_CHAIN;
5654 Info.ptrVal = nullptr;
5655 MVT DataVT = MVT::getVT(I.getType());
5656 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5657 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5658 IndexVT.getVectorNumElements());
5659 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5660 Info.align = Align(1);
5661 Info.flags |= MachineMemOperand::MOLoad;
5662 break;
5663 }
5664 case SCATTER: {
5665 Info.opc = ISD::INTRINSIC_VOID;
5666 Info.ptrVal = nullptr;
5667 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5668 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5669 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5670 IndexVT.getVectorNumElements());
5671 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5672 Info.align = Align(1);
5673 Info.flags |= MachineMemOperand::MOStore;
5674 break;
5675 }
5676 default:
5677 return false;
5678 }
5679
5680 return true;
5681}
5682
5683/// Returns true if the target can instruction select the
5684/// specified FP immediate natively. If false, the legalizer will
5685/// materialize the FP immediate as a load from a constant pool.
5686bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5687 bool ForCodeSize) const {
5688 for (const APFloat &FPImm : LegalFPImmediates)
5689 if (Imm.bitwiseIsEqual(FPImm))
5690 return true;
5691 return false;
5692}
5693
5694bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5695 ISD::LoadExtType ExtTy,
5696 EVT NewVT) const {
5697 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5697, __extension__
__PRETTY_FUNCTION__))
;
5698
5699 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5700 // relocation target a movq or addq instruction: don't let the load shrink.
5701 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5702 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5703 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5704 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5705
5706 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5707 // those uses are extracted directly into a store, then the extract + store
5708 // can be store-folded. Therefore, it's probably not worth splitting the load.
5709 EVT VT = Load->getValueType(0);
5710 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5711 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5712 // Skip uses of the chain value. Result 0 of the node is the load value.
5713 if (UI.getUse().getResNo() != 0)
5714 continue;
5715
5716 // If this use is not an extract + store, it's probably worth splitting.
5717 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5718 UI->use_begin()->getOpcode() != ISD::STORE)
5719 return true;
5720 }
5721 // All non-chain uses are extract + store.
5722 return false;
5723 }
5724
5725 return true;
5726}
5727
5728/// Returns true if it is beneficial to convert a load of a constant
5729/// to just the constant itself.
5730bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5731 Type *Ty) const {
5732 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5732, __extension__ __PRETTY_FUNCTION__))
;
5733
5734 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5735 if (BitSize == 0 || BitSize > 64)
5736 return false;
5737 return true;
5738}
5739
5740bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5741 // If we are using XMM registers in the ABI and the condition of the select is
5742 // a floating-point compare and we have blendv or conditional move, then it is
5743 // cheaper to select instead of doing a cross-register move and creating a
5744 // load that depends on the compare result.
5745 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5746 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5747}
5748
5749bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5750 // TODO: It might be a win to ease or lift this restriction, but the generic
5751 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5752 if (VT.isVector() && Subtarget.hasAVX512())
5753 return false;
5754
5755 return true;
5756}
5757
5758bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5759 SDValue C) const {
5760 // TODO: We handle scalars using custom code, but generic combining could make
5761 // that unnecessary.
5762 APInt MulC;
5763 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5764 return false;
5765
5766 // Find the type this will be legalized too. Otherwise we might prematurely
5767 // convert this to shl+add/sub and then still have to type legalize those ops.
5768 // Another choice would be to defer the decision for illegal types until
5769 // after type legalization. But constant splat vectors of i64 can't make it
5770 // through type legalization on 32-bit targets so we would need to special
5771 // case vXi64.
5772 while (getTypeAction(Context, VT) != TypeLegal)
5773 VT = getTypeToTransformTo(Context, VT);
5774
5775 // If vector multiply is legal, assume that's faster than shl + add/sub.
5776 // Multiply is a complex op with higher latency and lower throughput in
5777 // most implementations, sub-vXi32 vector multiplies are always fast,
5778 // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
5779 // is always going to be slow.
5780 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5781 if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
5782 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
5783 return false;
5784
5785 // shl+add, shl+sub, shl+add+neg
5786 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5787 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5788}
5789
5790bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5791 unsigned Index) const {
5792 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5793 return false;
5794
5795 // Mask vectors support all subregister combinations and operations that
5796 // extract half of vector.
5797 if (ResVT.getVectorElementType() == MVT::i1)
5798 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5799 (Index == ResVT.getVectorNumElements()));
5800
5801 return (Index % ResVT.getVectorNumElements()) == 0;
5802}
5803
5804bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5805 unsigned Opc = VecOp.getOpcode();
5806
5807 // Assume target opcodes can't be scalarized.
5808 // TODO - do we have any exceptions?
5809 if (Opc >= ISD::BUILTIN_OP_END)
5810 return false;
5811
5812 // If the vector op is not supported, try to convert to scalar.
5813 EVT VecVT = VecOp.getValueType();
5814 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5815 return true;
5816
5817 // If the vector op is supported, but the scalar op is not, the transform may
5818 // not be worthwhile.
5819 EVT ScalarVT = VecVT.getScalarType();
5820 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5821}
5822
5823bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5824 bool) const {
5825 // TODO: Allow vectors?
5826 if (VT.isVector())
5827 return false;
5828 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5829}
5830
5831bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
5832 // Speculate cttz only if we can directly use TZCNT or can promote to i32.
5833 return Subtarget.hasBMI() ||
5834 (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
5835}
5836
5837bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
5838 // Speculate ctlz only if we can directly use LZCNT.
5839 return Subtarget.hasLZCNT();
5840}
5841
5842bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
5843 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
5844}
5845
5846bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
5847 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
5848 // expensive than a straight movsd. On the other hand, it's important to
5849 // shrink long double fp constant since fldt is very slow.
5850 return !Subtarget.hasSSE2() || VT == MVT::f80;
5851}
5852
5853bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {
5854 return (VT == MVT::f64 && Subtarget.hasSSE2()) ||
5855 (VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16;
5856}
5857
5858bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5859 const SelectionDAG &DAG,
5860 const MachineMemOperand &MMO) const {
5861 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5862 BitcastVT.getVectorElementType() == MVT::i1)
5863 return false;
5864
5865 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5866 return false;
5867
5868 // If both types are legal vectors, it's always ok to convert them.
5869 if (LoadVT.isVector() && BitcastVT.isVector() &&
5870 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5871 return true;
5872
5873 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5874}
5875
5876bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5877 const MachineFunction &MF) const {
5878 // Do not merge to float value size (128 bytes) if no implicit
5879 // float attribute is set.
5880 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
5881
5882 if (NoFloat) {
5883 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5884 return (MemVT.getSizeInBits() <= MaxIntSize);
5885 }
5886 // Make sure we don't merge greater than our preferred vector
5887 // width.
5888 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5889 return false;
5890
5891 return true;
5892}
5893
5894bool X86TargetLowering::isCtlzFast() const {
5895 return Subtarget.hasFastLZCNT();
5896}
5897
5898bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5899 const Instruction &AndI) const {
5900 return true;
5901}
5902
5903bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5904 EVT VT = Y.getValueType();
5905
5906 if (VT.isVector())
5907 return false;
5908
5909 if (!Subtarget.hasBMI())
5910 return false;
5911
5912 // There are only 32-bit and 64-bit forms for 'andn'.
5913 if (VT != MVT::i32 && VT != MVT::i64)
5914 return false;
5915
5916 return !isa<ConstantSDNode>(Y);
5917}
5918
5919bool X86TargetLowering::hasAndNot(SDValue Y) const {
5920 EVT VT = Y.getValueType();
5921
5922 if (!VT.isVector())
5923 return hasAndNotCompare(Y);
5924
5925 // Vector.
5926
5927 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5928 return false;
5929
5930 if (VT == MVT::v4i32)
5931 return true;
5932
5933 return Subtarget.hasSSE2();
5934