Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 16692, column 21
The result of the '/' expression is undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/X86 -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/X86 -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/X86/X86ISelLowering.cpp
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/IntrinsicLowering.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineJumpTableInfo.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetLowering.h"
43#include "llvm/CodeGen/WinEHFuncInfo.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/DiagnosticInfo.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/PatternMatch.h"
55#include "llvm/MC/MCAsmInfo.h"
56#include "llvm/MC/MCContext.h"
57#include "llvm/MC/MCExpr.h"
58#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <bitset>
67#include <cctype>
68#include <numeric>
69using namespace llvm;
70
71#define DEBUG_TYPE"x86-isel" "x86-isel"
72
73STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
74
75static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
76 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
77 cl::desc(
78 "Sets the preferable loop alignment for experiments (as log2 bytes) "
79 "for innermost loops only. If specified, this option overrides "
80 "alignment set by x86-experimental-pref-loop-alignment."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
108 const X86Subtarget &STI)
109 : TargetLowering(TM), Subtarget(STI) {
110 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
111 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
112
113 // Set up the TargetLowering object.
114
115 // X86 is weird. It always uses i8 for shift amounts and setcc results.
116 setBooleanContents(ZeroOrOneBooleanContent);
117 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
118 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
119
120 // For 64-bit, since we have so many registers, use the ILP scheduler.
121 // For 32-bit, use the register pressure specific scheduling.
122 // For Atom, always use ILP scheduling.
123 if (Subtarget.isAtom())
124 setSchedulingPreference(Sched::ILP);
125 else if (Subtarget.is64Bit())
126 setSchedulingPreference(Sched::ILP);
127 else
128 setSchedulingPreference(Sched::RegPressure);
129 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
130 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
131
132 // Bypass expensive divides and use cheaper ones.
133 if (TM.getOptLevel() >= CodeGenOpt::Default) {
134 if (Subtarget.hasSlowDivide32())
135 addBypassSlowDiv(32, 8);
136 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
137 addBypassSlowDiv(64, 32);
138 }
139
140 // Setup Windows compiler runtime calls.
141 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
142 static const struct {
143 const RTLIB::Libcall Op;
144 const char * const Name;
145 const CallingConv::ID CC;
146 } LibraryCalls[] = {
147 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
148 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
149 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
150 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
151 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
152 };
153
154 for (const auto &LC : LibraryCalls) {
155 setLibcallName(LC.Op, LC.Name);
156 setLibcallCallingConv(LC.Op, LC.CC);
157 }
158 }
159
160 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
161 // MSVCRT doesn't have powi; fall back to pow
162 setLibcallName(RTLIB::POWI_F32, nullptr);
163 setLibcallName(RTLIB::POWI_F64, nullptr);
164 }
165
166 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
167 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
168 // FIXME: Should we be limiting the atomic size on other configs? Default is
169 // 1024.
170 if (!Subtarget.canUseCMPXCHG8B())
171 setMaxAtomicSizeInBitsSupported(32);
172
173 // Set up the register classes.
174 addRegisterClass(MVT::i8, &X86::GR8RegClass);
175 addRegisterClass(MVT::i16, &X86::GR16RegClass);
176 addRegisterClass(MVT::i32, &X86::GR32RegClass);
177 if (Subtarget.is64Bit())
178 addRegisterClass(MVT::i64, &X86::GR64RegClass);
179
180 for (MVT VT : MVT::integer_valuetypes())
181 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
182
183 // We don't accept any truncstore of integer registers.
184 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
185 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
186 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
187 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
188 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
189 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
190
191 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192
193 // SETOEQ and SETUNE require checking two conditions.
194 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
195 setCondCodeAction(ISD::SETOEQ, VT, Expand);
196 setCondCodeAction(ISD::SETUNE, VT, Expand);
197 }
198
199 // Integer absolute.
200 if (Subtarget.canUseCMOV()) {
201 setOperationAction(ISD::ABS , MVT::i16 , Custom);
202 setOperationAction(ISD::ABS , MVT::i32 , Custom);
203 if (Subtarget.is64Bit())
204 setOperationAction(ISD::ABS , MVT::i64 , Custom);
205 }
206
207 // Signed saturation subtraction.
208 setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
209 setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
210 setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
211 if (Subtarget.is64Bit())
212 setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
213
214 // Funnel shifts.
215 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
216 // For slow shld targets we only lower for code size.
217 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
218
219 setOperationAction(ShiftOp , MVT::i8 , Custom);
220 setOperationAction(ShiftOp , MVT::i16 , Custom);
221 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
222 if (Subtarget.is64Bit())
223 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
224 }
225
226 if (!Subtarget.useSoftFloat()) {
227 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
228 // operation.
229 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
230 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
231 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
232 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
233 // We have an algorithm for SSE2, and we turn this into a 64-bit
234 // FILD or VCVTUSI2SS/SD for other targets.
235 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
236 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
237 // We have an algorithm for SSE2->double, and we turn this into a
238 // 64-bit FILD followed by conditional FADD for other targets.
239 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
240 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
241
242 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
243 // this operation.
244 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
245 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
246 // SSE has no i16 to fp conversion, only i32. We promote in the handler
247 // to allow f80 to use i16 and f64 to use i16 with sse1 only
248 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
249 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
250 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
251 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
252 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
253 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
254 // are Legal, f80 is custom lowered.
255 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
256 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
257
258 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
259 // this operation.
260 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
261 // FIXME: This doesn't generate invalid exception when it should. PR44019.
262 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
263 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
264 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
265 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
267 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
268 // are Legal, f80 is custom lowered.
269 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
270 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
271
272 // Handle FP_TO_UINT by promoting the destination to a larger signed
273 // conversion.
274 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
275 // FIXME: This doesn't generate invalid exception when it should. PR44019.
276 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
277 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
278 // FIXME: This doesn't generate invalid exception when it should. PR44019.
279 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
280 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
281 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
283 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
284
285 setOperationAction(ISD::LRINT, MVT::f32, Custom);
286 setOperationAction(ISD::LRINT, MVT::f64, Custom);
287 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
288 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
289
290 if (!Subtarget.is64Bit()) {
291 setOperationAction(ISD::LRINT, MVT::i64, Custom);
292 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
293 }
294 }
295
296 if (Subtarget.hasSSE2()) {
297 // Custom lowering for saturating float to int conversions.
298 // We handle promotion to larger result types manually.
299 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
300 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
301 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
302 }
303 if (Subtarget.is64Bit()) {
304 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
305 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
306 }
307 }
308
309 // Handle address space casts between mixed sized pointers.
310 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
311 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
312
313 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
314 if (!Subtarget.hasSSE2()) {
315 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
316 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
317 if (Subtarget.is64Bit()) {
318 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
319 // Without SSE, i64->f64 goes through memory.
320 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
321 }
322 } else if (!Subtarget.is64Bit())
323 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
324
325 // Scalar integer divide and remainder are lowered to use operations that
326 // produce two results, to match the available instructions. This exposes
327 // the two-result form to trivial CSE, which is able to combine x/y and x%y
328 // into a single instruction.
329 //
330 // Scalar integer multiply-high is also lowered to use two-result
331 // operations, to match the available instructions. However, plain multiply
332 // (low) operations are left as Legal, as there are single-result
333 // instructions for this in x86. Using the two-result multiply instructions
334 // when both high and low results are needed must be arranged by dagcombine.
335 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
336 setOperationAction(ISD::MULHS, VT, Expand);
337 setOperationAction(ISD::MULHU, VT, Expand);
338 setOperationAction(ISD::SDIV, VT, Expand);
339 setOperationAction(ISD::UDIV, VT, Expand);
340 setOperationAction(ISD::SREM, VT, Expand);
341 setOperationAction(ISD::UREM, VT, Expand);
342 }
343
344 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
345 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
346 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
347 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
348 setOperationAction(ISD::BR_CC, VT, Expand);
349 setOperationAction(ISD::SELECT_CC, VT, Expand);
350 }
351 if (Subtarget.is64Bit())
352 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
353 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
354 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
356
357 setOperationAction(ISD::FREM , MVT::f32 , Expand);
358 setOperationAction(ISD::FREM , MVT::f64 , Expand);
359 setOperationAction(ISD::FREM , MVT::f80 , Expand);
360 setOperationAction(ISD::FREM , MVT::f128 , Expand);
361
362 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
363 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
364 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
365 }
366
367 // Promote the i8 variants and force them on up to i32 which has a shorter
368 // encoding.
369 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
370 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
371
372 if (Subtarget.hasBMI()) {
373 // Promote the i16 zero undef variant and force it on up to i32 when tzcnt
374 // is enabled.
375 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
376 } else {
377 setOperationAction(ISD::CTTZ, MVT::i16, Custom);
378 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
379 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
380 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
381 if (Subtarget.is64Bit()) {
382 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
383 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
384 }
385 }
386
387 if (Subtarget.hasLZCNT()) {
388 // When promoting the i8 variants, force them to i32 for a shorter
389 // encoding.
390 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
391 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
392 } else {
393 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
394 if (VT == MVT::i64 && !Subtarget.is64Bit())
395 continue;
396 setOperationAction(ISD::CTLZ , VT, Custom);
397 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
398 }
399 }
400
401 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
402 ISD::STRICT_FP_TO_FP16}) {
403 // Special handling for half-precision floating point conversions.
404 // If we don't have F16C support, then lower half float conversions
405 // into library calls.
406 setOperationAction(
407 Op, MVT::f32,
408 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
409 // There's never any support for operations beyond MVT::f32.
410 setOperationAction(Op, MVT::f64, Expand);
411 setOperationAction(Op, MVT::f80, Expand);
412 setOperationAction(Op, MVT::f128, Expand);
413 }
414
415 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
416 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
417 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
418 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
419 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
420 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
421 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
422 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
423
424 setOperationAction(ISD::PARITY, MVT::i8, Custom);
425 setOperationAction(ISD::PARITY, MVT::i16, Custom);
426 setOperationAction(ISD::PARITY, MVT::i32, Custom);
427 if (Subtarget.is64Bit())
428 setOperationAction(ISD::PARITY, MVT::i64, Custom);
429 if (Subtarget.hasPOPCNT()) {
430 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
431 // popcntw is longer to encode than popcntl and also has a false dependency
432 // on the dest that popcntl hasn't had since Cannon Lake.
433 setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
434 } else {
435 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
436 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
437 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
438 if (Subtarget.is64Bit())
439 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
440 else
441 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
442 }
443
444 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
445
446 if (!Subtarget.hasMOVBE())
447 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
448
449 // X86 wants to expand cmov itself.
450 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
451 setOperationAction(ISD::SELECT, VT, Custom);
452 setOperationAction(ISD::SETCC, VT, Custom);
453 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
454 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
455 }
456 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SELECT, VT, Custom);
460 setOperationAction(ISD::SETCC, VT, Custom);
461 }
462
463 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
464 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
465 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
466
467 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
468 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
469 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
470 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
471 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
472 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
473 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
474 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
475
476 // Darwin ABI issue.
477 for (auto VT : { MVT::i32, MVT::i64 }) {
478 if (VT == MVT::i64 && !Subtarget.is64Bit())
479 continue;
480 setOperationAction(ISD::ConstantPool , VT, Custom);
481 setOperationAction(ISD::JumpTable , VT, Custom);
482 setOperationAction(ISD::GlobalAddress , VT, Custom);
483 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
484 setOperationAction(ISD::ExternalSymbol , VT, Custom);
485 setOperationAction(ISD::BlockAddress , VT, Custom);
486 }
487
488 // 64-bit shl, sra, srl (iff 32-bit x86)
489 for (auto VT : { MVT::i32, MVT::i64 }) {
490 if (VT == MVT::i64 && !Subtarget.is64Bit())
491 continue;
492 setOperationAction(ISD::SHL_PARTS, VT, Custom);
493 setOperationAction(ISD::SRA_PARTS, VT, Custom);
494 setOperationAction(ISD::SRL_PARTS, VT, Custom);
495 }
496
497 if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
498 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
499
500 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
501
502 // Expand certain atomics
503 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
504 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
505 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
506 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
507 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
508 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
509 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
510 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
511 }
512
513 if (!Subtarget.is64Bit())
514 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
515
516 if (Subtarget.canUseCMPXCHG16B())
517 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
518
519 // FIXME - use subtarget debug flags
520 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
521 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
522 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
523 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
524 }
525
526 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
527 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
528
529 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
530 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
531
532 setOperationAction(ISD::TRAP, MVT::Other, Legal);
533 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
534 if (Subtarget.getTargetTriple().isPS4())
535 setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
536 else
537 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
538
539 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
540 setOperationAction(ISD::VASTART , MVT::Other, Custom);
541 setOperationAction(ISD::VAEND , MVT::Other, Expand);
542 bool Is64Bit = Subtarget.is64Bit();
543 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
544 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
545
546 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
547 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
548
549 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
550
551 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
552 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
553 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
554
555 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
556 // f32 and f64 use SSE.
557 // Set up the FP register classes.
558 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
559 : &X86::FR32RegClass);
560 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
561 : &X86::FR64RegClass);
562
563 // Disable f32->f64 extload as we can only generate this in one instruction
564 // under optsize. So its easier to pattern match (fpext (load)) for that
565 // case instead of needing to emit 2 instructions for extload in the
566 // non-optsize case.
567 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
568
569 for (auto VT : { MVT::f32, MVT::f64 }) {
570 // Use ANDPD to simulate FABS.
571 setOperationAction(ISD::FABS, VT, Custom);
572
573 // Use XORP to simulate FNEG.
574 setOperationAction(ISD::FNEG, VT, Custom);
575
576 // Use ANDPD and ORPD to simulate FCOPYSIGN.
577 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
578
579 // These might be better off as horizontal vector ops.
580 setOperationAction(ISD::FADD, VT, Custom);
581 setOperationAction(ISD::FSUB, VT, Custom);
582
583 // We don't support sin/cos/fmod
584 setOperationAction(ISD::FSIN , VT, Expand);
585 setOperationAction(ISD::FCOS , VT, Expand);
586 setOperationAction(ISD::FSINCOS, VT, Expand);
587 }
588
589 // Lower this to MOVMSK plus an AND.
590 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
591 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
592
593 } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
594 (UseX87 || Is64Bit)) {
595 // Use SSE for f32, x87 for f64.
596 // Set up the FP register classes.
597 addRegisterClass(MVT::f32, &X86::FR32RegClass);
598 if (UseX87)
599 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
600
601 // Use ANDPS to simulate FABS.
602 setOperationAction(ISD::FABS , MVT::f32, Custom);
603
604 // Use XORP to simulate FNEG.
605 setOperationAction(ISD::FNEG , MVT::f32, Custom);
606
607 if (UseX87)
608 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
609
610 // Use ANDPS and ORPS to simulate FCOPYSIGN.
611 if (UseX87)
612 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
613 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
614
615 // We don't support sin/cos/fmod
616 setOperationAction(ISD::FSIN , MVT::f32, Expand);
617 setOperationAction(ISD::FCOS , MVT::f32, Expand);
618 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
619
620 if (UseX87) {
621 // Always expand sin/cos functions even though x87 has an instruction.
622 setOperationAction(ISD::FSIN, MVT::f64, Expand);
623 setOperationAction(ISD::FCOS, MVT::f64, Expand);
624 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
625 }
626 } else if (UseX87) {
627 // f32 and f64 in x87.
628 // Set up the FP register classes.
629 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
630 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
631
632 for (auto VT : { MVT::f32, MVT::f64 }) {
633 setOperationAction(ISD::UNDEF, VT, Expand);
634 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
635
636 // Always expand sin/cos functions even though x87 has an instruction.
637 setOperationAction(ISD::FSIN , VT, Expand);
638 setOperationAction(ISD::FCOS , VT, Expand);
639 setOperationAction(ISD::FSINCOS, VT, Expand);
640 }
641 }
642
643 // Expand FP32 immediates into loads from the stack, save special cases.
644 if (isTypeLegal(MVT::f32)) {
645 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
646 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
647 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
648 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
649 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
650 } else // SSE immediates.
651 addLegalFPImmediate(APFloat(+0.0f)); // xorps
652 }
653 // Expand FP64 immediates into loads from the stack, save special cases.
654 if (isTypeLegal(MVT::f64)) {
655 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
656 addLegalFPImmediate(APFloat(+0.0)); // FLD0
657 addLegalFPImmediate(APFloat(+1.0)); // FLD1
658 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
659 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
660 } else // SSE immediates.
661 addLegalFPImmediate(APFloat(+0.0)); // xorpd
662 }
663 // Handle constrained floating-point operations of scalar.
664 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
665 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
666 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
667 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
668 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
669 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
670 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
671 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
672 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
673 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
674 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
675 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
676 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
677
678 // We don't support FMA.
679 setOperationAction(ISD::FMA, MVT::f64, Expand);
680 setOperationAction(ISD::FMA, MVT::f32, Expand);
681
682 // f80 always uses X87.
683 if (UseX87) {
684 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
685 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
686 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
687 {
688 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
689 addLegalFPImmediate(TmpFlt); // FLD0
690 TmpFlt.changeSign();
691 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
692
693 bool ignored;
694 APFloat TmpFlt2(+1.0);
695 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
696 &ignored);
697 addLegalFPImmediate(TmpFlt2); // FLD1
698 TmpFlt2.changeSign();
699 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
700 }
701
702 // Always expand sin/cos functions even though x87 has an instruction.
703 setOperationAction(ISD::FSIN , MVT::f80, Expand);
704 setOperationAction(ISD::FCOS , MVT::f80, Expand);
705 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
706
707 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
708 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
709 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
710 setOperationAction(ISD::FRINT, MVT::f80, Expand);
711 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
712 setOperationAction(ISD::FMA, MVT::f80, Expand);
713 setOperationAction(ISD::LROUND, MVT::f80, Expand);
714 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
715 setOperationAction(ISD::LRINT, MVT::f80, Custom);
716 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
717
718 // Handle constrained floating-point operations of scalar.
719 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
720 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
721 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
722 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
723 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
724 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
725 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
726 // as Custom.
727 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
728 }
729
730 // f128 uses xmm registers, but most operations require libcalls.
731 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
732 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
733 : &X86::VR128RegClass);
734
735 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
736
737 setOperationAction(ISD::FADD, MVT::f128, LibCall);
738 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
739 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
740 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
741 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
742 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
743 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
744 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
745 setOperationAction(ISD::FMA, MVT::f128, LibCall);
746 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
747
748 setOperationAction(ISD::FABS, MVT::f128, Custom);
749 setOperationAction(ISD::FNEG, MVT::f128, Custom);
750 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
751
752 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
753 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
754 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
755 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
756 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
757 // No STRICT_FSINCOS
758 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
759 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
760
761 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
762 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
763 // We need to custom handle any FP_ROUND with an f128 input, but
764 // LegalizeDAG uses the result type to know when to run a custom handler.
765 // So we have to list all legal floating point result types here.
766 if (isTypeLegal(MVT::f32)) {
767 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
768 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
769 }
770 if (isTypeLegal(MVT::f64)) {
771 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
772 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
773 }
774 if (isTypeLegal(MVT::f80)) {
775 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
776 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
777 }
778
779 setOperationAction(ISD::SETCC, MVT::f128, Custom);
780
781 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
782 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
783 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
784 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
785 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
786 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
787 }
788
789 // Always use a library call for pow.
790 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
791 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
792 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
793 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
794
795 setOperationAction(ISD::FLOG, MVT::f80, Expand);
796 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
797 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
798 setOperationAction(ISD::FEXP, MVT::f80, Expand);
799 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
800 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
801 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
802
803 // Some FP actions are always expanded for vector types.
804 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
805 MVT::v4f32, MVT::v8f32, MVT::v16f32,
806 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
807 setOperationAction(ISD::FSIN, VT, Expand);
808 setOperationAction(ISD::FSINCOS, VT, Expand);
809 setOperationAction(ISD::FCOS, VT, Expand);
810 setOperationAction(ISD::FREM, VT, Expand);
811 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
812 setOperationAction(ISD::FPOW, VT, Expand);
813 setOperationAction(ISD::FLOG, VT, Expand);
814 setOperationAction(ISD::FLOG2, VT, Expand);
815 setOperationAction(ISD::FLOG10, VT, Expand);
816 setOperationAction(ISD::FEXP, VT, Expand);
817 setOperationAction(ISD::FEXP2, VT, Expand);
818 }
819
820 // First set operation action for all vector types to either promote
821 // (for widening) or expand (for scalarization). Then we will selectively
822 // turn on ones that can be effectively codegen'd.
823 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
824 setOperationAction(ISD::SDIV, VT, Expand);
825 setOperationAction(ISD::UDIV, VT, Expand);
826 setOperationAction(ISD::SREM, VT, Expand);
827 setOperationAction(ISD::UREM, VT, Expand);
828 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
829 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
830 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
831 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
832 setOperationAction(ISD::FMA, VT, Expand);
833 setOperationAction(ISD::FFLOOR, VT, Expand);
834 setOperationAction(ISD::FCEIL, VT, Expand);
835 setOperationAction(ISD::FTRUNC, VT, Expand);
836 setOperationAction(ISD::FRINT, VT, Expand);
837 setOperationAction(ISD::FNEARBYINT, VT, Expand);
838 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
839 setOperationAction(ISD::MULHS, VT, Expand);
840 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
841 setOperationAction(ISD::MULHU, VT, Expand);
842 setOperationAction(ISD::SDIVREM, VT, Expand);
843 setOperationAction(ISD::UDIVREM, VT, Expand);
844 setOperationAction(ISD::CTPOP, VT, Expand);
845 setOperationAction(ISD::CTTZ, VT, Expand);
846 setOperationAction(ISD::CTLZ, VT, Expand);
847 setOperationAction(ISD::ROTL, VT, Expand);
848 setOperationAction(ISD::ROTR, VT, Expand);
849 setOperationAction(ISD::BSWAP, VT, Expand);
850 setOperationAction(ISD::SETCC, VT, Expand);
851 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
852 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
853 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
854 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
855 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
856 setOperationAction(ISD::TRUNCATE, VT, Expand);
857 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
858 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
859 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
860 setOperationAction(ISD::SELECT_CC, VT, Expand);
861 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
862 setTruncStoreAction(InnerVT, VT, Expand);
863
864 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
865 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
866
867 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
868 // types, we have to deal with them whether we ask for Expansion or not.
869 // Setting Expand causes its own optimisation problems though, so leave
870 // them legal.
871 if (VT.getVectorElementType() == MVT::i1)
872 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
873
874 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
875 // split/scalarized right now.
876 if (VT.getVectorElementType() == MVT::f16)
877 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
878 }
879 }
880
881 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
882 // with -msoft-float, disable use of MMX as well.
883 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
884 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
885 // No operations on x86mmx supported, everything uses intrinsics.
886 }
887
888 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
889 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
890 : &X86::VR128RegClass);
891
892 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
893 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
894 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
895 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
896 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
897 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
898 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
899 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
900
901 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
902 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
903
904 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
905 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
906 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
907 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
908 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
909 }
910
911 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
912 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
913 : &X86::VR128RegClass);
914
915 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
916 // registers cannot be used even for integer operations.
917 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
918 : &X86::VR128RegClass);
919 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
920 : &X86::VR128RegClass);
921 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
922 : &X86::VR128RegClass);
923 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
924 : &X86::VR128RegClass);
925
926 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
927 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
928 setOperationAction(ISD::SDIV, VT, Custom);
929 setOperationAction(ISD::SREM, VT, Custom);
930 setOperationAction(ISD::UDIV, VT, Custom);
931 setOperationAction(ISD::UREM, VT, Custom);
932 }
933
934 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
935 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
936 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
937
938 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
939 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
940 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
941 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
942 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
943 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
944 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
945 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
946 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
947 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
948 setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
949 setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
950
951 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
952 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
953
954 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
955 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
956 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
957
958 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
959 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
960 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
961 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
962 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
963 }
964
965 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
966 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
967 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
968 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
969 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
970 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
971 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
972 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
973 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
974 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
975
976 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
977 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
978 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
979 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
980
981 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
982 setOperationAction(ISD::SETCC, VT, Custom);
983 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
984 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
985 setOperationAction(ISD::CTPOP, VT, Custom);
986 setOperationAction(ISD::ABS, VT, Custom);
987
988 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
989 // setcc all the way to isel and prefer SETGT in some isel patterns.
990 setCondCodeAction(ISD::SETLT, VT, Custom);
991 setCondCodeAction(ISD::SETLE, VT, Custom);
992 }
993
994 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
995 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
996 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
997 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
998 setOperationAction(ISD::VSELECT, VT, Custom);
999 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1000 }
1001
1002 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
1003 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1004 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1005 setOperationAction(ISD::VSELECT, VT, Custom);
1006
1007 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1008 continue;
1009
1010 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1011 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1012 }
1013
1014 // Custom lower v2i64 and v2f64 selects.
1015 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1016 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1017 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1018 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1019 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1020
1021 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1022 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1023 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1024 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1025 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
1026 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1027
1028 // Custom legalize these to avoid over promotion or custom promotion.
1029 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1030 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1031 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1032 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1033 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1034 }
1035
1036 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1037 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
1038 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1039 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1040
1041 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1042 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1043
1044 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1045 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1046
1047 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1048 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1049 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1050 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1051 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1052
1053 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1054 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1055 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1056 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1057
1058 // We want to legalize this to an f64 load rather than an i64 load on
1059 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1060 // store.
1061 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1062 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1063 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1064 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1065 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1066 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1067
1068 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1069 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1070 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1071 if (!Subtarget.hasAVX512())
1072 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1073
1074 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1075 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1076 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1077
1078 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1079
1080 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1081 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1082 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1083 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1084 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1085 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1086
1087 // In the customized shift lowering, the legal v4i32/v2i64 cases
1088 // in AVX2 will be recognized.
1089 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1090 setOperationAction(ISD::SRL, VT, Custom);
1091 setOperationAction(ISD::SHL, VT, Custom);
1092 setOperationAction(ISD::SRA, VT, Custom);
1093 if (VT == MVT::v2i64) continue;
1094 setOperationAction(ISD::ROTL, VT, Custom);
1095 setOperationAction(ISD::ROTR, VT, Custom);
1096 setOperationAction(ISD::FSHL, VT, Custom);
1097 setOperationAction(ISD::FSHR, VT, Custom);
1098 }
1099
1100 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1101 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1102 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1103 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1104 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1105 }
1106
1107 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1108 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1109 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1110 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1111 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1112 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1113 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1114 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1115 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1116
1117 // These might be better off as horizontal vector ops.
1118 setOperationAction(ISD::ADD, MVT::i16, Custom);
1119 setOperationAction(ISD::ADD, MVT::i32, Custom);
1120 setOperationAction(ISD::SUB, MVT::i16, Custom);
1121 setOperationAction(ISD::SUB, MVT::i32, Custom);
1122 }
1123
1124 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1125 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1126 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1127 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1128 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1129 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1130 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1131 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1132 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1133 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1134 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1135 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1136 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1137 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1138
1139 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1140 }
1141
1142 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1143 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1144 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1145 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1146 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1147 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1148 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1149 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1150
1151 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1152 setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
1153 setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
1154
1155 // FIXME: Do we need to handle scalar-to-vector here?
1156 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1157
1158 // We directly match byte blends in the backend as they match the VSELECT
1159 // condition form.
1160 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1161
1162 // SSE41 brings specific instructions for doing vector sign extend even in
1163 // cases where we don't have SRA.
1164 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1165 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1166 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1167 }
1168
1169 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1170 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1171 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1172 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1173 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1174 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1175 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1176 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1177 }
1178
1179 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1180 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1181 // do the pre and post work in the vector domain.
1182 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1183 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1184 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1185 // so that DAG combine doesn't try to turn it into uint_to_fp.
1186 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1187 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1188 }
1189 }
1190
1191 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1192 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1193 }
1194
1195 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1196 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1197 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1198 setOperationAction(ISD::ROTL, VT, Custom);
1199 setOperationAction(ISD::ROTR, VT, Custom);
1200 }
1201
1202 // XOP can efficiently perform BITREVERSE with VPPERM.
1203 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1204 setOperationAction(ISD::BITREVERSE, VT, Custom);
1205
1206 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1207 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1208 setOperationAction(ISD::BITREVERSE, VT, Custom);
1209 }
1210
1211 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1212 bool HasInt256 = Subtarget.hasInt256();
1213
1214 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1215 : &X86::VR256RegClass);
1216 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1217 : &X86::VR256RegClass);
1218 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1219 : &X86::VR256RegClass);
1220 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1221 : &X86::VR256RegClass);
1222 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1223 : &X86::VR256RegClass);
1224 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1225 : &X86::VR256RegClass);
1226
1227 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1228 setOperationAction(ISD::FFLOOR, VT, Legal);
1229 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1230 setOperationAction(ISD::FCEIL, VT, Legal);
1231 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1232 setOperationAction(ISD::FTRUNC, VT, Legal);
1233 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1234 setOperationAction(ISD::FRINT, VT, Legal);
1235 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1236 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1237 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1238 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1239 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1240
1241 setOperationAction(ISD::FROUND, VT, Custom);
1242
1243 setOperationAction(ISD::FNEG, VT, Custom);
1244 setOperationAction(ISD::FABS, VT, Custom);
1245 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1246 }
1247
1248 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1249 // even though v8i16 is a legal type.
1250 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1251 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1252 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1253 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1254 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1255 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1256 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1257
1258 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1259 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1260
1261 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1262 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1263 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1264 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1265 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1266 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1267 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1268 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1269 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1270 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1271 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1272 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1273
1274 if (!Subtarget.hasAVX512())
1275 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1276
1277 // In the customized shift lowering, the legal v8i32/v4i64 cases
1278 // in AVX2 will be recognized.
1279 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1280 setOperationAction(ISD::SRL, VT, Custom);
1281 setOperationAction(ISD::SHL, VT, Custom);
1282 setOperationAction(ISD::SRA, VT, Custom);
1283 if (VT == MVT::v4i64) continue;
1284 setOperationAction(ISD::ROTL, VT, Custom);
1285 setOperationAction(ISD::ROTR, VT, Custom);
1286 setOperationAction(ISD::FSHL, VT, Custom);
1287 setOperationAction(ISD::FSHR, VT, Custom);
1288 }
1289
1290 // These types need custom splitting if their input is a 128-bit vector.
1291 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1292 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1293 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1294 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1295
1296 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1297 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1298 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1299 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1300 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1301 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1302
1303 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1304 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1305 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1306 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1307 }
1308
1309 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1310 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1311 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1312 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1313
1314 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1315 setOperationAction(ISD::SETCC, VT, Custom);
1316 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1317 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1318 setOperationAction(ISD::CTPOP, VT, Custom);
1319 setOperationAction(ISD::CTLZ, VT, Custom);
1320
1321 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1322 // setcc all the way to isel and prefer SETGT in some isel patterns.
1323 setCondCodeAction(ISD::SETLT, VT, Custom);
1324 setCondCodeAction(ISD::SETLE, VT, Custom);
1325 }
1326
1327 if (Subtarget.hasAnyFMA()) {
1328 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1329 MVT::v2f64, MVT::v4f64 }) {
1330 setOperationAction(ISD::FMA, VT, Legal);
1331 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1332 }
1333 }
1334
1335 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1336 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1337 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1338 }
1339
1340 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1341 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1342 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1343 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1344
1345 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1346 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1347 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1348 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1349 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1350 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1351 setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
1352 setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
1353
1354 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1355 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1356
1357 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1358 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1359 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1360 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1361 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1362
1363 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1364 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1365 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1366 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1367 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1368 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1369 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1370 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1371 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1372 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1373 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1374 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1375
1376 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1377 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1378 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1379 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1380 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1381 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1382 }
1383
1384 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1385 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1386 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1387 }
1388
1389 if (HasInt256) {
1390 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1391 // when we have a 256bit-wide blend with immediate.
1392 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1393 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1394
1395 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1396 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1397 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1398 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1399 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1400 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1401 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1402 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1403 }
1404 }
1405
1406 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1407 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1408 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1409 setOperationAction(ISD::MSTORE, VT, Legal);
1410 }
1411
1412 // Extract subvector is special because the value type
1413 // (result) is 128-bit but the source is 256-bit wide.
1414 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1415 MVT::v4f32, MVT::v2f64 }) {
1416 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1417 }
1418
1419 // Custom lower several nodes for 256-bit types.
1420 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1421 MVT::v8f32, MVT::v4f64 }) {
1422 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1423 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1424 setOperationAction(ISD::VSELECT, VT, Custom);
1425 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1426 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1427 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1428 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1429 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1430 setOperationAction(ISD::STORE, VT, Custom);
1431 }
1432
1433 if (HasInt256) {
1434 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1435
1436 // Custom legalize 2x32 to get a little better code.
1437 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1438 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1439
1440 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1441 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1442 setOperationAction(ISD::MGATHER, VT, Custom);
1443 }
1444 }
1445
1446 // This block controls legalization of the mask vector sizes that are
1447 // available with AVX512. 512-bit vectors are in a separate block controlled
1448 // by useAVX512Regs.
1449 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1450 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1451 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1452 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1453 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1454 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1455
1456 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1457 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1458 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1459
1460 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1461 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1462 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1463 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1464 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1465 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1466 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1467 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1468 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1469 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1470 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1471 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1472
1473 // There is no byte sized k-register load or store without AVX512DQ.
1474 if (!Subtarget.hasDQI()) {
1475 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1476 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1477 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1478 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1479
1480 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1481 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1482 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1483 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1484 }
1485
1486 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1487 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1488 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1489 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1490 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1491 }
1492
1493 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1494 setOperationAction(ISD::VSELECT, VT, Expand);
1495
1496 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1497 setOperationAction(ISD::SETCC, VT, Custom);
1498 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1499 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1500 setOperationAction(ISD::SELECT, VT, Custom);
1501 setOperationAction(ISD::TRUNCATE, VT, Custom);
1502
1503 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1504 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1505 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1506 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1507 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1508 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1509 }
1510
1511 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1512 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1513 }
1514
1515 // This block controls legalization for 512-bit operations with 32/64 bit
1516 // elements. 512-bits can be disabled based on prefer-vector-width and
1517 // required-vector-width function attributes.
1518 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1519 bool HasBWI = Subtarget.hasBWI();
1520
1521 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1522 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1523 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1524 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1525 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1526 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1527
1528 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1529 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1530 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1531 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1532 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1533 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1534 if (HasBWI)
1535 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1536 }
1537
1538 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1539 setOperationAction(ISD::FNEG, VT, Custom);
1540 setOperationAction(ISD::FABS, VT, Custom);
1541 setOperationAction(ISD::FMA, VT, Legal);
1542 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1543 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1544 }
1545
1546 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1547 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1548 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1549 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1550 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1551 }
1552 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1553 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1554 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1555 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1556 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1557 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1558 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1559 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1560
1561 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1562 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1563 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1564 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1565 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1566 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1567 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1568 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1569 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1570 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1571 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1572 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1573
1574 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1575 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1576 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1577 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1578 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1579 if (HasBWI)
1580 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1581
1582 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1583 // to 512-bit rather than use the AVX2 instructions so that we can use
1584 // k-masks.
1585 if (!Subtarget.hasVLX()) {
1586 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1587 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1588 setOperationAction(ISD::MLOAD, VT, Custom);
1589 setOperationAction(ISD::MSTORE, VT, Custom);
1590 }
1591 }
1592
1593 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1594 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1595 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1596 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1597 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1598 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1599 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1600 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1601 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1602 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1603 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1604 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1605 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1606
1607 if (HasBWI) {
1608 // Extends from v64i1 masks to 512-bit vectors.
1609 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1610 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1611 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1612 }
1613
1614 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1615 setOperationAction(ISD::FFLOOR, VT, Legal);
1616 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1617 setOperationAction(ISD::FCEIL, VT, Legal);
1618 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1619 setOperationAction(ISD::FTRUNC, VT, Legal);
1620 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1621 setOperationAction(ISD::FRINT, VT, Legal);
1622 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1623 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1624 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1625 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1626 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1627
1628 setOperationAction(ISD::FROUND, VT, Custom);
1629 }
1630
1631 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1632 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1633 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1634 }
1635
1636 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1637 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1638 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1639 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1640
1641 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1642 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1643 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1644 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1645
1646 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1647 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1648 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1649 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1650 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1651 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1652 setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
1653 setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
1654
1655 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1656 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1657
1658 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1659
1660 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1661 setOperationAction(ISD::SRL, VT, Custom);
1662 setOperationAction(ISD::SHL, VT, Custom);
1663 setOperationAction(ISD::SRA, VT, Custom);
1664 setOperationAction(ISD::ROTL, VT, Custom);
1665 setOperationAction(ISD::ROTR, VT, Custom);
1666 setOperationAction(ISD::SETCC, VT, Custom);
1667
1668 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1669 // setcc all the way to isel and prefer SETGT in some isel patterns.
1670 setCondCodeAction(ISD::SETLT, VT, Custom);
1671 setCondCodeAction(ISD::SETLE, VT, Custom);
1672 }
1673 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1674 setOperationAction(ISD::SMAX, VT, Legal);
1675 setOperationAction(ISD::UMAX, VT, Legal);
1676 setOperationAction(ISD::SMIN, VT, Legal);
1677 setOperationAction(ISD::UMIN, VT, Legal);
1678 setOperationAction(ISD::ABS, VT, Legal);
1679 setOperationAction(ISD::CTPOP, VT, Custom);
1680 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1681 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1682 }
1683
1684 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1685 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1686 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1687 setOperationAction(ISD::CTLZ, VT, Custom);
1688 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1689 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1690 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1691 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1692 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1693 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1694 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1695 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1696 }
1697
1698 setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
1699 setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
1700 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1701 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1702 setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
1703 setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
1704
1705 if (Subtarget.hasDQI()) {
1706 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1707 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1708 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1709 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1710 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1711 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1712 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1713 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1714
1715 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1716 }
1717
1718 if (Subtarget.hasCDI()) {
1719 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1720 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1721 setOperationAction(ISD::CTLZ, VT, Legal);
1722 }
1723 } // Subtarget.hasCDI()
1724
1725 if (Subtarget.hasVPOPCNTDQ()) {
1726 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1727 setOperationAction(ISD::CTPOP, VT, Legal);
1728 }
1729
1730 // Extract subvector is special because the value type
1731 // (result) is 256-bit but the source is 512-bit wide.
1732 // 128-bit was made Legal under AVX1.
1733 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1734 MVT::v8f32, MVT::v4f64 })
1735 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1736
1737 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1738 MVT::v16f32, MVT::v8f64 }) {
1739 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1740 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1741 setOperationAction(ISD::SELECT, VT, Custom);
1742 setOperationAction(ISD::VSELECT, VT, Custom);
1743 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1744 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1745 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1746 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1747 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1748 }
1749
1750 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1751 setOperationAction(ISD::MLOAD, VT, Legal);
1752 setOperationAction(ISD::MSTORE, VT, Legal);
1753 setOperationAction(ISD::MGATHER, VT, Custom);
1754 setOperationAction(ISD::MSCATTER, VT, Custom);
1755 }
1756 if (HasBWI) {
1757 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1758 setOperationAction(ISD::MLOAD, VT, Legal);
1759 setOperationAction(ISD::MSTORE, VT, Legal);
1760 }
1761 } else {
1762 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1763 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1764 }
1765
1766 if (Subtarget.hasVBMI2()) {
1767 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1768 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1769 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1770 setOperationAction(ISD::FSHL, VT, Custom);
1771 setOperationAction(ISD::FSHR, VT, Custom);
1772 }
1773
1774 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1775 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1776 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1777 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1778 }
1779 }// useAVX512Regs
1780
1781 // This block controls legalization for operations that don't have
1782 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1783 // narrower widths.
1784 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1785 // These operations are handled on non-VLX by artificially widening in
1786 // isel patterns.
1787
1788 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1789 Subtarget.hasVLX() ? Legal : Custom);
1790 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1791 Subtarget.hasVLX() ? Legal : Custom);
1792 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1793 Subtarget.hasVLX() ? Legal : Custom);
1794 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1795 Subtarget.hasVLX() ? Legal : Custom);
1796 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1797 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1798 Subtarget.hasVLX() ? Legal : Custom);
1799 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1800 Subtarget.hasVLX() ? Legal : Custom);
1801 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1802 Subtarget.hasVLX() ? Legal : Custom);
1803 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1804 Subtarget.hasVLX() ? Legal : Custom);
1805
1806 if (Subtarget.hasDQI()) {
1807 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1808 // v2f32 UINT_TO_FP is already custom under SSE2.
1809 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1811, __extension__
__PRETTY_FUNCTION__))
1810 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1811, __extension__
__PRETTY_FUNCTION__))
1811 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 1811, __extension__
__PRETTY_FUNCTION__))
;
1812 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1813 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1814 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1815 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1816 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1817 }
1818
1819 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1820 setOperationAction(ISD::SMAX, VT, Legal);
1821 setOperationAction(ISD::UMAX, VT, Legal);
1822 setOperationAction(ISD::SMIN, VT, Legal);
1823 setOperationAction(ISD::UMIN, VT, Legal);
1824 setOperationAction(ISD::ABS, VT, Legal);
1825 }
1826
1827 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1828 setOperationAction(ISD::ROTL, VT, Custom);
1829 setOperationAction(ISD::ROTR, VT, Custom);
1830 }
1831
1832 // Custom legalize 2x32 to get a little better code.
1833 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1834 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1835
1836 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1837 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1838 setOperationAction(ISD::MSCATTER, VT, Custom);
1839
1840 if (Subtarget.hasDQI()) {
1841 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1842 setOperationAction(ISD::SINT_TO_FP, VT,
1843 Subtarget.hasVLX() ? Legal : Custom);
1844 setOperationAction(ISD::UINT_TO_FP, VT,
1845 Subtarget.hasVLX() ? Legal : Custom);
1846 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1847 Subtarget.hasVLX() ? Legal : Custom);
1848 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1849 Subtarget.hasVLX() ? Legal : Custom);
1850 setOperationAction(ISD::FP_TO_SINT, VT,
1851 Subtarget.hasVLX() ? Legal : Custom);
1852 setOperationAction(ISD::FP_TO_UINT, VT,
1853 Subtarget.hasVLX() ? Legal : Custom);
1854 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1855 Subtarget.hasVLX() ? Legal : Custom);
1856 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1857 Subtarget.hasVLX() ? Legal : Custom);
1858 setOperationAction(ISD::MUL, VT, Legal);
1859 }
1860 }
1861
1862 if (Subtarget.hasCDI()) {
1863 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1864 setOperationAction(ISD::CTLZ, VT, Legal);
1865 }
1866 } // Subtarget.hasCDI()
1867
1868 if (Subtarget.hasVPOPCNTDQ()) {
1869 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1870 setOperationAction(ISD::CTPOP, VT, Legal);
1871 }
1872 }
1873
1874 // This block control legalization of v32i1/v64i1 which are available with
1875 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1876 // useBWIRegs.
1877 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1878 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1879 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1880
1881 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1882 setOperationAction(ISD::VSELECT, VT, Expand);
1883 setOperationAction(ISD::TRUNCATE, VT, Custom);
1884 setOperationAction(ISD::SETCC, VT, Custom);
1885 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1886 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1887 setOperationAction(ISD::SELECT, VT, Custom);
1888 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1889 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1890 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1891 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1892 }
1893
1894 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1895 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1896
1897 // Extends from v32i1 masks to 256-bit vectors.
1898 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1899 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1900 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1901
1902 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1903 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1904 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1905 }
1906
1907 // These operations are handled on non-VLX by artificially widening in
1908 // isel patterns.
1909 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1910
1911 if (Subtarget.hasBITALG()) {
1912 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1913 setOperationAction(ISD::CTPOP, VT, Legal);
1914 }
1915 }
1916
1917 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
1918 auto setGroup = [&] (MVT VT) {
1919 setOperationAction(ISD::FADD, VT, Legal);
1920 setOperationAction(ISD::STRICT_FADD, VT, Legal);
1921 setOperationAction(ISD::FSUB, VT, Legal);
1922 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
1923 setOperationAction(ISD::FMUL, VT, Legal);
1924 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
1925 setOperationAction(ISD::FDIV, VT, Legal);
1926 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
1927 setOperationAction(ISD::FSQRT, VT, Legal);
1928 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
1929
1930 setOperationAction(ISD::FFLOOR, VT, Legal);
1931 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1932 setOperationAction(ISD::FCEIL, VT, Legal);
1933 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1934 setOperationAction(ISD::FTRUNC, VT, Legal);
1935 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1936 setOperationAction(ISD::FRINT, VT, Legal);
1937 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1938 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1939 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1940
1941 setOperationAction(ISD::LOAD, VT, Legal);
1942 setOperationAction(ISD::STORE, VT, Legal);
1943
1944 setOperationAction(ISD::FMA, VT, Legal);
1945 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1946 setOperationAction(ISD::VSELECT, VT, Legal);
1947 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1948 setOperationAction(ISD::SELECT, VT, Custom);
1949
1950 setOperationAction(ISD::FNEG, VT, Custom);
1951 setOperationAction(ISD::FABS, VT, Custom);
1952 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1953 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1954 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1955 };
1956
1957 // AVX512_FP16 scalar operations
1958 setGroup(MVT::f16);
1959 addRegisterClass(MVT::f16, &X86::FR16XRegClass);
1960 setOperationAction(ISD::FREM, MVT::f16, Promote);
1961 setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
1962 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
1963 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
1964 setOperationAction(ISD::SETCC, MVT::f16, Custom);
1965 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
1966 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
1967 setOperationAction(ISD::FROUND, MVT::f16, Custom);
1968 setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
1969 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
1970 setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
1971 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
1972 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
1973 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
1974 if (isTypeLegal(MVT::f80)) {
1975 setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
1976 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
1977 }
1978
1979 setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
1980 setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
1981
1982 if (Subtarget.useAVX512Regs()) {
1983 setGroup(MVT::v32f16);
1984 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1985 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
1986 setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
1987 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
1988 setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
1989 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
1990 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
1991 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
1992 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
1993
1994 setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
1995 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
1996 setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
1997 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
1998 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
1999 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
2000 MVT::v32i16);
2001 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
2002 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
2003 MVT::v32i16);
2004 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
2005 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
2006 MVT::v32i16);
2007 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
2008 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
2009 MVT::v32i16);
2010
2011 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
2012 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
2013 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
2014
2015 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2016 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2017
2018 setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
2019 setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
2020 }
2021
2022 if (Subtarget.hasVLX()) {
2023 addRegisterClass(MVT::v8f16, &X86::VR128XRegClass);
2024 addRegisterClass(MVT::v16f16, &X86::VR256XRegClass);
2025 setGroup(MVT::v8f16);
2026 setGroup(MVT::v16f16);
2027
2028 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
2029 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
2030 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
2031 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
2032 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
2033 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
2034 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
2035 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
2036 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
2037 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
2038
2039 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
2040 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
2041 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
2042 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
2043 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
2044 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
2045
2046 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2047 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
2048 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
2049
2050 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
2051 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
2052 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
2053
2054 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2055 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2056 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2057 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2058
2059 // Need to custom widen these to prevent scalarization.
2060 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2061 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2062 }
2063
2064 // Support fp16 0 immediate
2065 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
2066 }
2067
2068 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2069 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2070 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2071 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2072 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2073 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2074
2075 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2076 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2077 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2078 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2079 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2080
2081 if (Subtarget.hasBWI()) {
2082 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2083 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2084 }
2085
2086 if (Subtarget.hasFP16()) {
2087 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2088 setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
2089 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
2090 setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
2091 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
2092 setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
2093 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
2094 setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
2095 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
2096 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2097 setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
2098 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
2099 setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
2100 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
2101 setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
2102 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
2103 setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
2104 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
2105 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2106 setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
2107 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
2108 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
2109 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
2110 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2111 setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
2112 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
2113 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
2114 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
2115 }
2116
2117 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
2118 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
2119 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
2120 }
2121
2122 if (Subtarget.hasAMXTILE()) {
2123 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2124 }
2125
2126 // We want to custom lower some of our intrinsics.
2127 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
2128 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
2129 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
2130 if (!Subtarget.is64Bit()) {
2131 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
2132 }
2133
2134 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2135 // handle type legalization for these operations here.
2136 //
2137 // FIXME: We really should do custom legalization for addition and
2138 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2139 // than generic legalization for 64-bit multiplication-with-overflow, though.
2140 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2141 if (VT == MVT::i64 && !Subtarget.is64Bit())
2142 continue;
2143 // Add/Sub/Mul with overflow operations are custom lowered.
2144 setOperationAction(ISD::SADDO, VT, Custom);
2145 setOperationAction(ISD::UADDO, VT, Custom);
2146 setOperationAction(ISD::SSUBO, VT, Custom);
2147 setOperationAction(ISD::USUBO, VT, Custom);
2148 setOperationAction(ISD::SMULO, VT, Custom);
2149 setOperationAction(ISD::UMULO, VT, Custom);
2150
2151 // Support carry in as value rather than glue.
2152 setOperationAction(ISD::ADDCARRY, VT, Custom);
2153 setOperationAction(ISD::SUBCARRY, VT, Custom);
2154 setOperationAction(ISD::SETCCCARRY, VT, Custom);
2155 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
2156 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
2157 }
2158
2159 if (!Subtarget.is64Bit()) {
2160 // These libcalls are not available in 32-bit.
2161 setLibcallName(RTLIB::SHL_I128, nullptr);
2162 setLibcallName(RTLIB::SRL_I128, nullptr);
2163 setLibcallName(RTLIB::SRA_I128, nullptr);
2164 setLibcallName(RTLIB::MUL_I128, nullptr);
2165 // The MULO libcall is not part of libgcc, only compiler-rt.
2166 setLibcallName(RTLIB::MULO_I64, nullptr);
2167 }
2168 // The MULO libcall is not part of libgcc, only compiler-rt.
2169 setLibcallName(RTLIB::MULO_I128, nullptr);
2170
2171 // Combine sin / cos into _sincos_stret if it is available.
2172 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2173 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2174 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
2175 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
2176 }
2177
2178 if (Subtarget.isTargetWin64()) {
2179 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2180 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2181 setOperationAction(ISD::SREM, MVT::i128, Custom);
2182 setOperationAction(ISD::UREM, MVT::i128, Custom);
2183 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
2184 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
2185 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
2186 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
2187 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
2188 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
2189 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
2190 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
2191 }
2192
2193 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2194 // is. We should promote the value to 64-bits to solve this.
2195 // This is what the CRT headers do - `fmodf` is an inline header
2196 // function casting to f64 and calling `fmod`.
2197 if (Subtarget.is32Bit() &&
2198 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2199 for (ISD::NodeType Op :
2200 {ISD::FCEIL, ISD::STRICT_FCEIL,
2201 ISD::FCOS, ISD::STRICT_FCOS,
2202 ISD::FEXP, ISD::STRICT_FEXP,
2203 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2204 ISD::FREM, ISD::STRICT_FREM,
2205 ISD::FLOG, ISD::STRICT_FLOG,
2206 ISD::FLOG10, ISD::STRICT_FLOG10,
2207 ISD::FPOW, ISD::STRICT_FPOW,
2208 ISD::FSIN, ISD::STRICT_FSIN})
2209 if (isOperationExpand(Op, MVT::f32))
2210 setOperationAction(Op, MVT::f32, Promote);
2211
2212 // We have target-specific dag combine patterns for the following nodes:
2213 setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
2214 ISD::SCALAR_TO_VECTOR,
2215 ISD::INSERT_VECTOR_ELT,
2216 ISD::EXTRACT_VECTOR_ELT,
2217 ISD::CONCAT_VECTORS,
2218 ISD::INSERT_SUBVECTOR,
2219 ISD::EXTRACT_SUBVECTOR,
2220 ISD::BITCAST,
2221 ISD::VSELECT,
2222 ISD::SELECT,
2223 ISD::SHL,
2224 ISD::SRA,
2225 ISD::SRL,
2226 ISD::OR,
2227 ISD::AND,
2228 ISD::ADD,
2229 ISD::FADD,
2230 ISD::FSUB,
2231 ISD::FNEG,
2232 ISD::FMA,
2233 ISD::STRICT_FMA,
2234 ISD::FMINNUM,
2235 ISD::FMAXNUM,
2236 ISD::SUB,
2237 ISD::LOAD,
2238 ISD::MLOAD,
2239 ISD::STORE,
2240 ISD::MSTORE,
2241 ISD::TRUNCATE,
2242 ISD::ZERO_EXTEND,
2243 ISD::ANY_EXTEND,
2244 ISD::SIGN_EXTEND,
2245 ISD::SIGN_EXTEND_INREG,
2246 ISD::ANY_EXTEND_VECTOR_INREG,
2247 ISD::SIGN_EXTEND_VECTOR_INREG,
2248 ISD::ZERO_EXTEND_VECTOR_INREG,
2249 ISD::SINT_TO_FP,
2250 ISD::UINT_TO_FP,
2251 ISD::STRICT_SINT_TO_FP,
2252 ISD::STRICT_UINT_TO_FP,
2253 ISD::SETCC,
2254 ISD::MUL,
2255 ISD::XOR,
2256 ISD::MSCATTER,
2257 ISD::MGATHER,
2258 ISD::FP16_TO_FP,
2259 ISD::FP_EXTEND,
2260 ISD::STRICT_FP_EXTEND,
2261 ISD::FP_ROUND});
2262
2263 computeRegisterProperties(Subtarget.getRegisterInfo());
2264
2265 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2266 MaxStoresPerMemsetOptSize = 8;
2267 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2268 MaxStoresPerMemcpyOptSize = 4;
2269 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2270 MaxStoresPerMemmoveOptSize = 4;
2271
2272 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2273 // that needs to benchmarked and balanced with the potential use of vector
2274 // load/store types (PR33329, PR33914).
2275 MaxLoadsPerMemcmp = 2;
2276 MaxLoadsPerMemcmpOptSize = 2;
2277
2278 // Default loop alignment, which can be overridden by -align-loops.
2279 setPrefLoopAlignment(Align(16));
2280
2281 // An out-of-order CPU can speculatively execute past a predictable branch,
2282 // but a conditional move could be stalled by an expensive earlier operation.
2283 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2284 EnableExtLdPromotion = true;
2285 setPrefFunctionAlignment(Align(16));
2286
2287 verifyIntrinsicTables();
2288
2289 // Default to having -disable-strictnode-mutation on
2290 IsStrictFPEnabled = true;
2291}
2292
2293// This has so far only been implemented for 64-bit MachO.
2294bool X86TargetLowering::useLoadStackGuardNode() const {
2295 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2296}
2297
2298bool X86TargetLowering::useStackGuardXorFP() const {
2299 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2300 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2301}
2302
2303SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2304 const SDLoc &DL) const {
2305 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2306 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2307 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2308 return SDValue(Node, 0);
2309}
2310
2311TargetLoweringBase::LegalizeTypeAction
2312X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2313 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2314 !Subtarget.hasBWI())
2315 return TypeSplitVector;
2316
2317 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2318 VT.getVectorElementType() != MVT::i1)
2319 return TypeWidenVector;
2320
2321 return TargetLoweringBase::getPreferredVectorAction(VT);
2322}
2323
2324static std::pair<MVT, unsigned>
2325handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2326 const X86Subtarget &Subtarget) {
2327 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2328 // convention is one that uses k registers.
2329 if (NumElts == 2)
2330 return {MVT::v2i64, 1};
2331 if (NumElts == 4)
2332 return {MVT::v4i32, 1};
2333 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2334 CC != CallingConv::Intel_OCL_BI)
2335 return {MVT::v8i16, 1};
2336 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2337 CC != CallingConv::Intel_OCL_BI)
2338 return {MVT::v16i8, 1};
2339 // v32i1 passes in ymm unless we have BWI and the calling convention is
2340 // regcall.
2341 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2342 return {MVT::v32i8, 1};
2343 // Split v64i1 vectors if we don't have v64i8 available.
2344 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2345 if (Subtarget.useAVX512Regs())
2346 return {MVT::v64i8, 1};
2347 return {MVT::v32i8, 2};
2348 }
2349
2350 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2351 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2352 NumElts > 64)
2353 return {MVT::i8, NumElts};
2354
2355 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2356}
2357
2358MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2359 CallingConv::ID CC,
2360 EVT VT) const {
2361 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2362 Subtarget.hasAVX512()) {
2363 unsigned NumElts = VT.getVectorNumElements();
2364
2365 MVT RegisterVT;
2366 unsigned NumRegisters;
2367 std::tie(RegisterVT, NumRegisters) =
2368 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2369 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2370 return RegisterVT;
2371 }
2372
2373 // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
2374 // So its default register type is f16. We override the type to v8f16 here.
2375 if (VT == MVT::v3f16 && Subtarget.hasFP16())
2376 return MVT::v8f16;
2377
2378 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
2379 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
2380 !Subtarget.hasX87())
2381 return MVT::i32;
2382
2383 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2384}
2385
2386unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2387 CallingConv::ID CC,
2388 EVT VT) const {
2389 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2390 Subtarget.hasAVX512()) {
2391 unsigned NumElts = VT.getVectorNumElements();
2392
2393 MVT RegisterVT;
2394 unsigned NumRegisters;
2395 std::tie(RegisterVT, NumRegisters) =
2396 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2397 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2398 return NumRegisters;
2399 }
2400
2401 // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
2402 // So its default register number is 3. We override the number to 1 here.
2403 if (VT == MVT::v3f16 && Subtarget.hasFP16())
2404 return 1;
2405
2406 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
2407 // x87 is disabled.
2408 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
2409 if (VT == MVT::f64)
2410 return 2;
2411 if (VT == MVT::f80)
2412 return 3;
2413 }
2414
2415 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2416}
2417
2418unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2419 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2420 unsigned &NumIntermediates, MVT &RegisterVT) const {
2421 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2422 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2423 Subtarget.hasAVX512() &&
2424 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2425 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2426 VT.getVectorNumElements() > 64)) {
2427 RegisterVT = MVT::i8;
2428 IntermediateVT = MVT::i1;
2429 NumIntermediates = VT.getVectorNumElements();
2430 return NumIntermediates;
2431 }
2432
2433 // Split v64i1 vectors if we don't have v64i8 available.
2434 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2435 CC != CallingConv::X86_RegCall) {
2436 RegisterVT = MVT::v32i8;
2437 IntermediateVT = MVT::v32i1;
2438 NumIntermediates = 2;
2439 return 2;
2440 }
2441
2442 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2443 NumIntermediates, RegisterVT);
2444}
2445
2446EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2447 LLVMContext& Context,
2448 EVT VT) const {
2449 if (!VT.isVector())
2450 return MVT::i8;
2451
2452 if (Subtarget.hasAVX512()) {
2453 // Figure out what this type will be legalized to.
2454 EVT LegalVT = VT;
2455 while (getTypeAction(Context, LegalVT) != TypeLegal)
2456 LegalVT = getTypeToTransformTo(Context, LegalVT);
2457
2458 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2459 if (LegalVT.getSimpleVT().is512BitVector())
2460 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2461
2462 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2463 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2464 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2465 // vXi16/vXi8.
2466 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2467 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2468 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2469 }
2470 }
2471
2472 return VT.changeVectorElementTypeToInteger();
2473}
2474
2475/// Helper for getByValTypeAlignment to determine
2476/// the desired ByVal argument alignment.
2477static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2478 if (MaxAlign == 16)
2479 return;
2480 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2481 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2482 MaxAlign = Align(16);
2483 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2484 Align EltAlign;
2485 getMaxByValAlign(ATy->getElementType(), EltAlign);
2486 if (EltAlign > MaxAlign)
2487 MaxAlign = EltAlign;
2488 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2489 for (auto *EltTy : STy->elements()) {
2490 Align EltAlign;
2491 getMaxByValAlign(EltTy, EltAlign);
2492 if (EltAlign > MaxAlign)
2493 MaxAlign = EltAlign;
2494 if (MaxAlign == 16)
2495 break;
2496 }
2497 }
2498}
2499
2500/// Return the desired alignment for ByVal aggregate
2501/// function arguments in the caller parameter area. For X86, aggregates
2502/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2503/// are at 4-byte boundaries.
2504uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
2505 const DataLayout &DL) const {
2506 if (Subtarget.is64Bit()) {
2507 // Max of 8 and alignment of type.
2508 Align TyAlign = DL.getABITypeAlign(Ty);
2509 if (TyAlign > 8)
2510 return TyAlign.value();
2511 return 8;
2512 }
2513
2514 Align Alignment(4);
2515 if (Subtarget.hasSSE1())
2516 getMaxByValAlign(Ty, Alignment);
2517 return Alignment.value();
2518}
2519
2520/// It returns EVT::Other if the type should be determined using generic
2521/// target-independent logic.
2522/// For vector ops we check that the overall size isn't larger than our
2523/// preferred vector width.
2524EVT X86TargetLowering::getOptimalMemOpType(
2525 const MemOp &Op, const AttributeList &FuncAttributes) const {
2526 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2527 if (Op.size() >= 16 &&
2528 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2529 // FIXME: Check if unaligned 64-byte accesses are slow.
2530 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2531 (Subtarget.getPreferVectorWidth() >= 512)) {
2532 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2533 }
2534 // FIXME: Check if unaligned 32-byte accesses are slow.
2535 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2536 (Subtarget.getPreferVectorWidth() >= 256)) {
2537 // Although this isn't a well-supported type for AVX1, we'll let
2538 // legalization and shuffle lowering produce the optimal codegen. If we
2539 // choose an optimal type with a vector element larger than a byte,
2540 // getMemsetStores() may create an intermediate splat (using an integer
2541 // multiply) before we splat as a vector.
2542 return MVT::v32i8;
2543 }
2544 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2545 return MVT::v16i8;
2546 // TODO: Can SSE1 handle a byte vector?
2547 // If we have SSE1 registers we should be able to use them.
2548 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2549 (Subtarget.getPreferVectorWidth() >= 128))
2550 return MVT::v4f32;
2551 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2552 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2553 // Do not use f64 to lower memcpy if source is string constant. It's
2554 // better to use i32 to avoid the loads.
2555 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2556 // The gymnastics of splatting a byte value into an XMM register and then
2557 // only using 8-byte stores (because this is a CPU with slow unaligned
2558 // 16-byte accesses) makes that a loser.
2559 return MVT::f64;
2560 }
2561 }
2562 // This is a compromise. If we reach here, unaligned accesses may be slow on
2563 // this target. However, creating smaller, aligned accesses could be even
2564 // slower and would certainly be a lot more code.
2565 if (Subtarget.is64Bit() && Op.size() >= 8)
2566 return MVT::i64;
2567 return MVT::i32;
2568}
2569
2570bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2571 if (VT == MVT::f32)
2572 return Subtarget.hasSSE1();
2573 if (VT == MVT::f64)
2574 return Subtarget.hasSSE2();
2575 return true;
2576}
2577
2578bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2579 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2580 bool *Fast) const {
2581 if (Fast) {
2582 switch (VT.getSizeInBits()) {
2583 default:
2584 // 8-byte and under are always assumed to be fast.
2585 *Fast = true;
2586 break;
2587 case 128:
2588 *Fast = !Subtarget.isUnalignedMem16Slow();
2589 break;
2590 case 256:
2591 *Fast = !Subtarget.isUnalignedMem32Slow();
2592 break;
2593 // TODO: What about AVX-512 (512-bit) accesses?
2594 }
2595 }
2596 // NonTemporal vector memory ops must be aligned.
2597 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2598 // NT loads can only be vector aligned, so if its less aligned than the
2599 // minimum vector size (which we can split the vector down to), we might as
2600 // well use a regular unaligned vector load.
2601 // We don't have any NT loads pre-SSE41.
2602 if (!!(Flags & MachineMemOperand::MOLoad))
2603 return (Alignment < 16 || !Subtarget.hasSSE41());
2604 return false;
2605 }
2606 // Misaligned accesses of any size are always allowed.
2607 return true;
2608}
2609
2610/// Return the entry encoding for a jump table in the
2611/// current function. The returned value is a member of the
2612/// MachineJumpTableInfo::JTEntryKind enum.
2613unsigned X86TargetLowering::getJumpTableEncoding() const {
2614 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2615 // symbol.
2616 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2617 return MachineJumpTableInfo::EK_Custom32;
2618
2619 // Otherwise, use the normal jump table encoding heuristics.
2620 return TargetLowering::getJumpTableEncoding();
2621}
2622
2623bool X86TargetLowering::useSoftFloat() const {
2624 return Subtarget.useSoftFloat();
2625}
2626
2627void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2628 ArgListTy &Args) const {
2629
2630 // Only relabel X86-32 for C / Stdcall CCs.
2631 if (Subtarget.is64Bit())
2632 return;
2633 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2634 return;
2635 unsigned ParamRegs = 0;
2636 if (auto *M = MF->getFunction().getParent())
2637 ParamRegs = M->getNumberRegisterParameters();
2638
2639 // Mark the first N int arguments as having reg
2640 for (auto &Arg : Args) {
2641 Type *T = Arg.Ty;
2642 if (T->isIntOrPtrTy())
2643 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2644 unsigned numRegs = 1;
2645 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2646 numRegs = 2;
2647 if (ParamRegs < numRegs)
2648 return;
2649 ParamRegs -= numRegs;
2650 Arg.IsInReg = true;
2651 }
2652 }
2653}
2654
2655const MCExpr *
2656X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2657 const MachineBasicBlock *MBB,
2658 unsigned uid,MCContext &Ctx) const{
2659 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2659, __extension__
__PRETTY_FUNCTION__))
;
2660 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2661 // entries.
2662 return MCSymbolRefExpr::create(MBB->getSymbol(),
2663 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2664}
2665
2666/// Returns relocation base for the given PIC jumptable.
2667SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2668 SelectionDAG &DAG) const {
2669 if (!Subtarget.is64Bit())
2670 // This doesn't have SDLoc associated with it, but is not really the
2671 // same as a Register.
2672 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2673 getPointerTy(DAG.getDataLayout()));
2674 return Table;
2675}
2676
2677/// This returns the relocation base for the given PIC jumptable,
2678/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2679const MCExpr *X86TargetLowering::
2680getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2681 MCContext &Ctx) const {
2682 // X86-64 uses RIP relative addressing based on the jump table label.
2683 if (Subtarget.isPICStyleRIPRel())
2684 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2685
2686 // Otherwise, the reference is relative to the PIC base.
2687 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2688}
2689
2690std::pair<const TargetRegisterClass *, uint8_t>
2691X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2692 MVT VT) const {
2693 const TargetRegisterClass *RRC = nullptr;
2694 uint8_t Cost = 1;
2695 switch (VT.SimpleTy) {
2696 default:
2697 return TargetLowering::findRepresentativeClass(TRI, VT);
2698 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2699 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2700 break;
2701 case MVT::x86mmx:
2702 RRC = &X86::VR64RegClass;
2703 break;
2704 case MVT::f32: case MVT::f64:
2705 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2706 case MVT::v4f32: case MVT::v2f64:
2707 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2708 case MVT::v8f32: case MVT::v4f64:
2709 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2710 case MVT::v16f32: case MVT::v8f64:
2711 RRC = &X86::VR128XRegClass;
2712 break;
2713 }
2714 return std::make_pair(RRC, Cost);
2715}
2716
2717unsigned X86TargetLowering::getAddressSpace() const {
2718 if (Subtarget.is64Bit())
2719 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2720 return 256;
2721}
2722
2723static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2724 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2725 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2726}
2727
2728static Constant* SegmentOffset(IRBuilderBase &IRB,
2729 int Offset, unsigned AddressSpace) {
2730 return ConstantExpr::getIntToPtr(
2731 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2732 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2733}
2734
2735Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
2736 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2737 // tcbhead_t; use it instead of the usual global variable (see
2738 // sysdeps/{i386,x86_64}/nptl/tls.h)
2739 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2740 if (Subtarget.isTargetFuchsia()) {
2741 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2742 return SegmentOffset(IRB, 0x10, getAddressSpace());
2743 } else {
2744 unsigned AddressSpace = getAddressSpace();
2745 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
2746 // Specially, some users may customize the base reg and offset.
2747 int Offset = M->getStackProtectorGuardOffset();
2748 // If we don't set -stack-protector-guard-offset value:
2749 // %fs:0x28, unless we're using a Kernel code model, in which case
2750 // it's %gs:0x28. gs:0x14 on i386.
2751 if (Offset == INT_MAX2147483647)
2752 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2753
2754 StringRef GuardReg = M->getStackProtectorGuardReg();
2755 if (GuardReg == "fs")
2756 AddressSpace = X86AS::FS;
2757 else if (GuardReg == "gs")
2758 AddressSpace = X86AS::GS;
2759 return SegmentOffset(IRB, Offset, AddressSpace);
2760 }
2761 }
2762 return TargetLowering::getIRStackGuard(IRB);
2763}
2764
2765void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2766 // MSVC CRT provides functionalities for stack protection.
2767 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2768 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2769 // MSVC CRT has a global variable holding security cookie.
2770 M.getOrInsertGlobal("__security_cookie",
2771 Type::getInt8PtrTy(M.getContext()));
2772
2773 // MSVC CRT has a function to validate security cookie.
2774 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2775 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2776 Type::getInt8PtrTy(M.getContext()));
2777 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2778 F->setCallingConv(CallingConv::X86_FastCall);
2779 F->addParamAttr(0, Attribute::AttrKind::InReg);
2780 }
2781 return;
2782 }
2783
2784 StringRef GuardMode = M.getStackProtectorGuard();
2785
2786 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2787 if ((GuardMode == "tls" || GuardMode.empty()) &&
2788 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2789 return;
2790 TargetLowering::insertSSPDeclarations(M);
2791}
2792
2793Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2794 // MSVC CRT has a global variable holding security cookie.
2795 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2796 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2797 return M.getGlobalVariable("__security_cookie");
2798 }
2799 return TargetLowering::getSDagStackGuard(M);
2800}
2801
2802Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2803 // MSVC CRT has a function to validate security cookie.
2804 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2805 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2806 return M.getFunction("__security_check_cookie");
2807 }
2808 return TargetLowering::getSSPStackGuardCheck(M);
2809}
2810
2811Value *
2812X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
2813 if (Subtarget.getTargetTriple().isOSContiki())
2814 return getDefaultSafeStackPointerLocation(IRB, false);
2815
2816 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2817 // definition of TLS_SLOT_SAFESTACK in
2818 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2819 if (Subtarget.isTargetAndroid()) {
2820 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2821 // %gs:0x24 on i386
2822 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2823 return SegmentOffset(IRB, Offset, getAddressSpace());
2824 }
2825
2826 // Fuchsia is similar.
2827 if (Subtarget.isTargetFuchsia()) {
2828 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2829 return SegmentOffset(IRB, 0x18, getAddressSpace());
2830 }
2831
2832 return TargetLowering::getSafeStackPointerLocation(IRB);
2833}
2834
2835//===----------------------------------------------------------------------===//
2836// Return Value Calling Convention Implementation
2837//===----------------------------------------------------------------------===//
2838
2839bool X86TargetLowering::CanLowerReturn(
2840 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2841 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2842 SmallVector<CCValAssign, 16> RVLocs;
2843 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2844 return CCInfo.CheckReturn(Outs, RetCC_X86);
2845}
2846
2847const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2848 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2849 return ScratchRegs;
2850}
2851
2852/// Lowers masks values (v*i1) to the local register values
2853/// \returns DAG node after lowering to register type
2854static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2855 const SDLoc &Dl, SelectionDAG &DAG) {
2856 EVT ValVT = ValArg.getValueType();
2857
2858 if (ValVT == MVT::v1i1)
2859 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2860 DAG.getIntPtrConstant(0, Dl));
2861
2862 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2863 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2864 // Two stage lowering might be required
2865 // bitcast: v8i1 -> i8 / v16i1 -> i16
2866 // anyextend: i8 -> i32 / i16 -> i32
2867 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2868 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2869 if (ValLoc == MVT::i32)
2870 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2871 return ValToCopy;
2872 }
2873
2874 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2875 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2876 // One stage lowering is required
2877 // bitcast: v32i1 -> i32 / v64i1 -> i64
2878 return DAG.getBitcast(ValLoc, ValArg);
2879 }
2880
2881 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2882}
2883
2884/// Breaks v64i1 value into two registers and adds the new node to the DAG
2885static void Passv64i1ArgInRegs(
2886 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2887 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2888 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2889 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2889, __extension__
__PRETTY_FUNCTION__))
;
2890 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2890, __extension__
__PRETTY_FUNCTION__))
;
2891 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2891, __extension__
__PRETTY_FUNCTION__))
;
2892 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2893, __extension__
__PRETTY_FUNCTION__))
2893 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2893, __extension__
__PRETTY_FUNCTION__))
;
2894
2895 // Before splitting the value we cast it to i64
2896 Arg = DAG.getBitcast(MVT::i64, Arg);
2897
2898 // Splitting the value into two i32 types
2899 SDValue Lo, Hi;
2900 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2901 DAG.getConstant(0, Dl, MVT::i32));
2902 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2903 DAG.getConstant(1, Dl, MVT::i32));
2904
2905 // Attach the two i32 types into corresponding registers
2906 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2907 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2908}
2909
2910SDValue
2911X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2912 bool isVarArg,
2913 const SmallVectorImpl<ISD::OutputArg> &Outs,
2914 const SmallVectorImpl<SDValue> &OutVals,
2915 const SDLoc &dl, SelectionDAG &DAG) const {
2916 MachineFunction &MF = DAG.getMachineFunction();
2917 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2918
2919 // In some cases we need to disable registers from the default CSR list.
2920 // For example, when they are used for argument passing.
2921 bool ShouldDisableCalleeSavedRegister =
2922 CallConv == CallingConv::X86_RegCall ||
2923 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2924
2925 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2926 report_fatal_error("X86 interrupts may not return any value");
2927
2928 SmallVector<CCValAssign, 16> RVLocs;
2929 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2930 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2931
2932 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2933 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2934 ++I, ++OutsIndex) {
2935 CCValAssign &VA = RVLocs[I];
2936 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2936, __extension__
__PRETTY_FUNCTION__))
;
2937
2938 // Add the register to the CalleeSaveDisableRegs list.
2939 if (ShouldDisableCalleeSavedRegister)
2940 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2941
2942 SDValue ValToCopy = OutVals[OutsIndex];
2943 EVT ValVT = ValToCopy.getValueType();
2944
2945 // Promote values to the appropriate types.
2946 if (VA.getLocInfo() == CCValAssign::SExt)
2947 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2948 else if (VA.getLocInfo() == CCValAssign::ZExt)
2949 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2950 else if (VA.getLocInfo() == CCValAssign::AExt) {
2951 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2952 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2953 else
2954 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2955 }
2956 else if (VA.getLocInfo() == CCValAssign::BCvt)
2957 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2958
2959 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2960, __extension__
__PRETTY_FUNCTION__))
2960 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 2960, __extension__
__PRETTY_FUNCTION__))
;
2961
2962 // Report an error if we have attempted to return a value via an XMM
2963 // register and SSE was disabled.
2964 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2965 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2966 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2967 } else if (!Subtarget.hasSSE2() &&
2968 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2969 ValVT == MVT::f64) {
2970 // When returning a double via an XMM register, report an error if SSE2 is
2971 // not enabled.
2972 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2973 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2974 }
2975
2976 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2977 // the RET instruction and handled by the FP Stackifier.
2978 if (VA.getLocReg() == X86::FP0 ||
2979 VA.getLocReg() == X86::FP1) {
2980 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2981 // change the value to the FP stack register class.
2982 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2983 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2984 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2985 // Don't emit a copytoreg.
2986 continue;
2987 }
2988
2989 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2990 // which is returned in RAX / RDX.
2991 if (Subtarget.is64Bit()) {
2992 if (ValVT == MVT::x86mmx) {
2993 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2994 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2995 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2996 ValToCopy);
2997 // If we don't have SSE2 available, convert to v4f32 so the generated
2998 // register is legal.
2999 if (!Subtarget.hasSSE2())
3000 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
3001 }
3002 }
3003 }
3004
3005 if (VA.needsCustom()) {
3006 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3007, __extension__
__PRETTY_FUNCTION__))
3007 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3007, __extension__
__PRETTY_FUNCTION__))
;
3008
3009 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
3010 Subtarget);
3011
3012 // Add the second register to the CalleeSaveDisableRegs list.
3013 if (ShouldDisableCalleeSavedRegister)
3014 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
3015 } else {
3016 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
3017 }
3018 }
3019
3020 SDValue Flag;
3021 SmallVector<SDValue, 6> RetOps;
3022 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3023 // Operand #1 = Bytes To Pop
3024 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
3025 MVT::i32));
3026
3027 // Copy the result values into the output registers.
3028 for (auto &RetVal : RetVals) {
3029 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
3030 RetOps.push_back(RetVal.second);
3031 continue; // Don't emit a copytoreg.
3032 }
3033
3034 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
3035 Flag = Chain.getValue(1);
3036 RetOps.push_back(
3037 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
3038 }
3039
3040 // Swift calling convention does not require we copy the sret argument
3041 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
3042
3043 // All x86 ABIs require that for returning structs by value we copy
3044 // the sret argument into %rax/%eax (depending on ABI) for the return.
3045 // We saved the argument into a virtual register in the entry block,
3046 // so now we copy the value out and into %rax/%eax.
3047 //
3048 // Checking Function.hasStructRetAttr() here is insufficient because the IR
3049 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
3050 // false, then an sret argument may be implicitly inserted in the SelDAG. In
3051 // either case FuncInfo->setSRetReturnReg() will have been called.
3052 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3053 // When we have both sret and another return value, we should use the
3054 // original Chain stored in RetOps[0], instead of the current Chain updated
3055 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
3056
3057 // For the case of sret and another return value, we have
3058 // Chain_0 at the function entry
3059 // Chain_1 = getCopyToReg(Chain_0) in the above loop
3060 // If we use Chain_1 in getCopyFromReg, we will have
3061 // Val = getCopyFromReg(Chain_1)
3062 // Chain_2 = getCopyToReg(Chain_1, Val) from below
3063
3064 // getCopyToReg(Chain_0) will be glued together with
3065 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
3066 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
3067 // Data dependency from Unit B to Unit A due to usage of Val in
3068 // getCopyToReg(Chain_1, Val)
3069 // Chain dependency from Unit A to Unit B
3070
3071 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
3072 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
3073 getPointerTy(MF.getDataLayout()));
3074
3075 Register RetValReg
3076 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
3077 X86::RAX : X86::EAX;
3078 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
3079 Flag = Chain.getValue(1);
3080
3081 // RAX/EAX now acts like a return value.
3082 RetOps.push_back(
3083 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
3084
3085 // Add the returned register to the CalleeSaveDisableRegs list.
3086 if (ShouldDisableCalleeSavedRegister)
3087 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
3088 }
3089
3090 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3091 const MCPhysReg *I =
3092 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3093 if (I) {
3094 for (; *I; ++I) {
3095 if (X86::GR64RegClass.contains(*I))
3096 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3097 else
3098 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3098)
;
3099 }
3100 }
3101
3102 RetOps[0] = Chain; // Update chain.
3103
3104 // Add the flag if we have it.
3105 if (Flag.getNode())
3106 RetOps.push_back(Flag);
3107
3108 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
3109 if (CallConv == CallingConv::X86_INTR)
3110 opcode = X86ISD::IRET;
3111 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
3112}
3113
3114bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3115 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
3116 return false;
3117
3118 SDValue TCChain = Chain;
3119 SDNode *Copy = *N->use_begin();
3120 if (Copy->getOpcode() == ISD::CopyToReg) {
3121 // If the copy has a glue operand, we conservatively assume it isn't safe to
3122 // perform a tail call.
3123 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3124 return false;
3125 TCChain = Copy->getOperand(0);
3126 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
3127 return false;
3128
3129 bool HasRet = false;
3130 for (const SDNode *U : Copy->uses()) {
3131 if (U->getOpcode() != X86ISD::RET_FLAG)
3132 return false;
3133 // If we are returning more than one value, we can definitely
3134 // not make a tail call see PR19530
3135 if (U->getNumOperands() > 4)
3136 return false;
3137 if (U->getNumOperands() == 4 &&
3138 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
3139 return false;
3140 HasRet = true;
3141 }
3142
3143 if (!HasRet)
3144 return false;
3145
3146 Chain = TCChain;
3147 return true;
3148}
3149
3150EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
3151 ISD::NodeType ExtendKind) const {
3152 MVT ReturnMVT = MVT::i32;
3153
3154 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
3155 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
3156 // The ABI does not require i1, i8 or i16 to be extended.
3157 //
3158 // On Darwin, there is code in the wild relying on Clang's old behaviour of
3159 // always extending i8/i16 return values, so keep doing that for now.
3160 // (PR26665).
3161 ReturnMVT = MVT::i8;
3162 }
3163
3164 EVT MinVT = getRegisterType(Context, ReturnMVT);
3165 return VT.bitsLT(MinVT) ? MinVT : VT;
3166}
3167
3168/// Reads two 32 bit registers and creates a 64 bit mask value.
3169/// \param VA The current 32 bit value that need to be assigned.
3170/// \param NextVA The next 32 bit value that need to be assigned.
3171/// \param Root The parent DAG node.
3172/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
3173/// glue purposes. In the case the DAG is already using
3174/// physical register instead of virtual, we should glue
3175/// our new SDValue to InFlag SDvalue.
3176/// \return a new SDvalue of size 64bit.
3177static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
3178 SDValue &Root, SelectionDAG &DAG,
3179 const SDLoc &Dl, const X86Subtarget &Subtarget,
3180 SDValue *InFlag = nullptr) {
3181 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3181, __extension__
__PRETTY_FUNCTION__))
;
3182 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3182, __extension__
__PRETTY_FUNCTION__))
;
3183 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3184, __extension__
__PRETTY_FUNCTION__))
3184 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3184, __extension__
__PRETTY_FUNCTION__))
;
3185 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3186, __extension__
__PRETTY_FUNCTION__))
3186 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3186, __extension__
__PRETTY_FUNCTION__))
;
3187 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3188, __extension__
__PRETTY_FUNCTION__))
3188 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3188, __extension__
__PRETTY_FUNCTION__))
;
3189
3190 SDValue Lo, Hi;
3191 SDValue ArgValueLo, ArgValueHi;
3192
3193 MachineFunction &MF = DAG.getMachineFunction();
3194 const TargetRegisterClass *RC = &X86::GR32RegClass;
3195
3196 // Read a 32 bit value from the registers.
3197 if (nullptr == InFlag) {
3198 // When no physical register is present,
3199 // create an intermediate virtual register.
3200 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3201 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3202 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3203 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3204 } else {
3205 // When a physical register is available read the value from it and glue
3206 // the reads together.
3207 ArgValueLo =
3208 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
3209 *InFlag = ArgValueLo.getValue(2);
3210 ArgValueHi =
3211 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
3212 *InFlag = ArgValueHi.getValue(2);
3213 }
3214
3215 // Convert the i32 type into v32i1 type.
3216 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
3217
3218 // Convert the i32 type into v32i1 type.
3219 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
3220
3221 // Concatenate the two values together.
3222 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
3223}
3224
3225/// The function will lower a register of various sizes (8/16/32/64)
3226/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
3227/// \returns a DAG node contains the operand after lowering to mask type.
3228static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3229 const EVT &ValLoc, const SDLoc &Dl,
3230 SelectionDAG &DAG) {
3231 SDValue ValReturned = ValArg;
3232
3233 if (ValVT == MVT::v1i1)
3234 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3235
3236 if (ValVT == MVT::v64i1) {
3237 // In 32 bit machine, this case is handled by getv64i1Argument
3238 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3238, __extension__
__PRETTY_FUNCTION__))
;
3239 // In 64 bit machine, There is no need to truncate the value only bitcast
3240 } else {
3241 MVT maskLen;
3242 switch (ValVT.getSimpleVT().SimpleTy) {
3243 case MVT::v8i1:
3244 maskLen = MVT::i8;
3245 break;
3246 case MVT::v16i1:
3247 maskLen = MVT::i16;
3248 break;
3249 case MVT::v32i1:
3250 maskLen = MVT::i32;
3251 break;
3252 default:
3253 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3253)
;
3254 }
3255
3256 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3257 }
3258 return DAG.getBitcast(ValVT, ValReturned);
3259}
3260
3261/// Lower the result values of a call into the
3262/// appropriate copies out of appropriate physical registers.
3263///
3264SDValue X86TargetLowering::LowerCallResult(
3265 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3266 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3267 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3268 uint32_t *RegMask) const {
3269
3270 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3271 // Assign locations to each value returned by this call.
3272 SmallVector<CCValAssign, 16> RVLocs;
3273 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3274 *DAG.getContext());
3275 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3276
3277 // Copy all of the result registers out of their specified physreg.
3278 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3279 ++I, ++InsIndex) {
3280 CCValAssign &VA = RVLocs[I];
3281 EVT CopyVT = VA.getLocVT();
3282
3283 // In some calling conventions we need to remove the used registers
3284 // from the register mask.
3285 if (RegMask) {
3286 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3287 SubRegs.isValid(); ++SubRegs)
3288 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3289 }
3290
3291 // Report an error if there was an attempt to return FP values via XMM
3292 // registers.
3293 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3294 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3295 if (VA.getLocReg() == X86::XMM1)
3296 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3297 else
3298 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3299 } else if (!Subtarget.hasSSE2() &&
3300 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3301 CopyVT == MVT::f64) {
3302 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3303 if (VA.getLocReg() == X86::XMM1)
3304 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3305 else
3306 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3307 }
3308
3309 // If we prefer to use the value in xmm registers, copy it out as f80 and
3310 // use a truncate to move it from fp stack reg to xmm reg.
3311 bool RoundAfterCopy = false;
3312 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3313 isScalarFPTypeInSSEReg(VA.getValVT())) {
3314 if (!Subtarget.hasX87())
3315 report_fatal_error("X87 register return with X87 disabled");
3316 CopyVT = MVT::f80;
3317 RoundAfterCopy = (CopyVT != VA.getLocVT());
3318 }
3319
3320 SDValue Val;
3321 if (VA.needsCustom()) {
3322 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3323, __extension__
__PRETTY_FUNCTION__))
3323 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3323, __extension__
__PRETTY_FUNCTION__))
;
3324 Val =
3325 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3326 } else {
3327 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3328 .getValue(1);
3329 Val = Chain.getValue(0);
3330 InFlag = Chain.getValue(2);
3331 }
3332
3333 if (RoundAfterCopy)
3334 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3335 // This truncation won't change the value.
3336 DAG.getIntPtrConstant(1, dl));
3337
3338 if (VA.isExtInLoc()) {
3339 if (VA.getValVT().isVector() &&
3340 VA.getValVT().getScalarType() == MVT::i1 &&
3341 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3342 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3343 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3344 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3345 } else
3346 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3347 }
3348
3349 if (VA.getLocInfo() == CCValAssign::BCvt)
3350 Val = DAG.getBitcast(VA.getValVT(), Val);
3351
3352 InVals.push_back(Val);
3353 }
3354
3355 return Chain;
3356}
3357
3358//===----------------------------------------------------------------------===//
3359// C & StdCall & Fast Calling Convention implementation
3360//===----------------------------------------------------------------------===//
3361// StdCall calling convention seems to be standard for many Windows' API
3362// routines and around. It differs from C calling convention just a little:
3363// callee should clean up the stack, not caller. Symbols should be also
3364// decorated in some fancy way :) It doesn't support any vector arguments.
3365// For info on fast calling convention see Fast Calling Convention (tail call)
3366// implementation LowerX86_32FastCCCallTo.
3367
3368/// Determines whether Args, either a set of outgoing arguments to a call, or a
3369/// set of incoming args of a call, contains an sret pointer that the callee
3370/// pops
3371template <typename T>
3372static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
3373 const X86Subtarget &Subtarget) {
3374 // Not C++20 (yet), so no concepts available.
3375 static_assert(std::is_same<T, ISD::OutputArg>::value ||
3376 std::is_same<T, ISD::InputArg>::value,
3377 "requires ISD::OutputArg or ISD::InputArg");
3378
3379 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
3380 // for most compilations.
3381 if (!Subtarget.is32Bit())
3382 return false;
3383
3384 if (Args.empty())
3385 return false;
3386
3387 // Most calls do not have an sret argument, check the arg next.
3388 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
3389 if (!Flags.isSRet() || Flags.isInReg())
3390 return false;
3391
3392 // The MSVCabi does not pop the sret.
3393 if (Subtarget.getTargetTriple().isOSMSVCRT())
3394 return false;
3395
3396 // MCUs don't pop the sret
3397 if (Subtarget.isTargetMCU())
3398 return false;
3399
3400 // Callee pops argument
3401 return true;
3402}
3403
3404/// Make a copy of an aggregate at address specified by "Src" to address
3405/// "Dst" with size and alignment information specified by the specific
3406/// parameter attribute. The copy will be passed as a byval function parameter.
3407static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3408 SDValue Chain, ISD::ArgFlagsTy Flags,
3409 SelectionDAG &DAG, const SDLoc &dl) {
3410 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3411
3412 return DAG.getMemcpy(
3413 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3414 /*isVolatile*/ false, /*AlwaysInline=*/true,
3415 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3416}
3417
3418/// Return true if the calling convention is one that we can guarantee TCO for.
3419static bool canGuaranteeTCO(CallingConv::ID CC) {
3420 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3421 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3422 CC == CallingConv::HHVM || CC == CallingConv::Tail ||
3423 CC == CallingConv::SwiftTail);
3424}
3425
3426/// Return true if we might ever do TCO for calls with this calling convention.
3427static bool mayTailCallThisCC(CallingConv::ID CC) {
3428 switch (CC) {
3429 // C calling conventions:
3430 case CallingConv::C:
3431 case CallingConv::Win64:
3432 case CallingConv::X86_64_SysV:
3433 // Callee pop conventions:
3434 case CallingConv::X86_ThisCall:
3435 case CallingConv::X86_StdCall:
3436 case CallingConv::X86_VectorCall:
3437 case CallingConv::X86_FastCall:
3438 // Swift:
3439 case CallingConv::Swift:
3440 return true;
3441 default:
3442 return canGuaranteeTCO(CC);
3443 }
3444}
3445
3446/// Return true if the function is being made into a tailcall target by
3447/// changing its ABI.
3448static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3449 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3450 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3451}
3452
3453bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3454 if (!CI->isTailCall())
3455 return false;
3456
3457 CallingConv::ID CalleeCC = CI->getCallingConv();
3458 if (!mayTailCallThisCC(CalleeCC))
3459 return false;
3460
3461 return true;
3462}
3463
3464SDValue
3465X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3466 const SmallVectorImpl<ISD::InputArg> &Ins,
3467 const SDLoc &dl, SelectionDAG &DAG,
3468 const CCValAssign &VA,
3469 MachineFrameInfo &MFI, unsigned i) const {
3470 // Create the nodes corresponding to a load from this parameter slot.
3471 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3472 bool AlwaysUseMutable = shouldGuaranteeTCO(
3473 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3474 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3475 EVT ValVT;
3476 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3477
3478 // If value is passed by pointer we have address passed instead of the value
3479 // itself. No need to extend if the mask value and location share the same
3480 // absolute size.
3481 bool ExtendedInMem =
3482 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3483 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3484
3485 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3486 ValVT = VA.getLocVT();
3487 else
3488 ValVT = VA.getValVT();
3489
3490 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3491 // changed with more analysis.
3492 // In case of tail call optimization mark all arguments mutable. Since they
3493 // could be overwritten by lowering of arguments in case of a tail call.
3494 if (Flags.isByVal()) {
3495 unsigned Bytes = Flags.getByValSize();
3496 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3497
3498 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3499 // can be improved with deeper analysis.
3500 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3501 /*isAliased=*/true);
3502 return DAG.getFrameIndex(FI, PtrVT);
3503 }
3504
3505 EVT ArgVT = Ins[i].ArgVT;
3506
3507 // If this is a vector that has been split into multiple parts, and the
3508 // scalar size of the parts don't match the vector element size, then we can't
3509 // elide the copy. The parts will have padding between them instead of being
3510 // packed like a vector.
3511 bool ScalarizedAndExtendedVector =
3512 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3513 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3514
3515 // This is an argument in memory. We might be able to perform copy elision.
3516 // If the argument is passed directly in memory without any extension, then we
3517 // can perform copy elision. Large vector types, for example, may be passed
3518 // indirectly by pointer.
3519 if (Flags.isCopyElisionCandidate() &&
3520 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3521 !ScalarizedAndExtendedVector) {
3522 SDValue PartAddr;
3523 if (Ins[i].PartOffset == 0) {
3524 // If this is a one-part value or the first part of a multi-part value,
3525 // create a stack object for the entire argument value type and return a
3526 // load from our portion of it. This assumes that if the first part of an
3527 // argument is in memory, the rest will also be in memory.
3528 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3529 /*IsImmutable=*/false);
3530 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3531 return DAG.getLoad(
3532 ValVT, dl, Chain, PartAddr,
3533 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3534 } else {
3535 // This is not the first piece of an argument in memory. See if there is
3536 // already a fixed stack object including this offset. If so, assume it
3537 // was created by the PartOffset == 0 branch above and create a load from
3538 // the appropriate offset into it.
3539 int64_t PartBegin = VA.getLocMemOffset();
3540 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3541 int FI = MFI.getObjectIndexBegin();
3542 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3543 int64_t ObjBegin = MFI.getObjectOffset(FI);
3544 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3545 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3546 break;
3547 }
3548 if (MFI.isFixedObjectIndex(FI)) {
3549 SDValue Addr =
3550 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3551 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3552 return DAG.getLoad(
3553 ValVT, dl, Chain, Addr,
3554 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3555 Ins[i].PartOffset));
3556 }
3557 }
3558 }
3559
3560 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3561 VA.getLocMemOffset(), isImmutable);
3562
3563 // Set SExt or ZExt flag.
3564 if (VA.getLocInfo() == CCValAssign::ZExt) {
3565 MFI.setObjectZExt(FI, true);
3566 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3567 MFI.setObjectSExt(FI, true);
3568 }
3569
3570 MaybeAlign Alignment;
3571 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
3572 ValVT != MVT::f80)
3573 Alignment = MaybeAlign(4);
3574 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3575 SDValue Val = DAG.getLoad(
3576 ValVT, dl, Chain, FIN,
3577 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3578 Alignment);
3579 return ExtendedInMem
3580 ? (VA.getValVT().isVector()
3581 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3582 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3583 : Val;
3584}
3585
3586// FIXME: Get this from tablegen.
3587static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3588 const X86Subtarget &Subtarget) {
3589 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3589, __extension__ __PRETTY_FUNCTION__))
;
3590
3591 if (Subtarget.isCallingConvWin64(CallConv)) {
3592 static const MCPhysReg GPR64ArgRegsWin64[] = {
3593 X86::RCX, X86::RDX, X86::R8, X86::R9
3594 };
3595 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3596 }
3597
3598 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3599 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3600 };
3601 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3602}
3603
3604// FIXME: Get this from tablegen.
3605static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3606 CallingConv::ID CallConv,
3607 const X86Subtarget &Subtarget) {
3608 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3608, __extension__ __PRETTY_FUNCTION__))
;
3609 if (Subtarget.isCallingConvWin64(CallConv)) {
3610 // The XMM registers which might contain var arg parameters are shadowed
3611 // in their paired GPR. So we only need to save the GPR to their home
3612 // slots.
3613 // TODO: __vectorcall will change this.
3614 return None;
3615 }
3616
3617 bool isSoftFloat = Subtarget.useSoftFloat();
3618 if (isSoftFloat || !Subtarget.hasSSE1())
3619 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3620 // registers.
3621 return None;
3622
3623 static const MCPhysReg XMMArgRegs64Bit[] = {
3624 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3625 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3626 };
3627 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3628}
3629
3630#ifndef NDEBUG
3631static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3632 return llvm::is_sorted(
3633 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3634 return A.getValNo() < B.getValNo();
3635 });
3636}
3637#endif
3638
3639namespace {
3640/// This is a helper class for lowering variable arguments parameters.
3641class VarArgsLoweringHelper {
3642public:
3643 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3644 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3645 CallingConv::ID CallConv, CCState &CCInfo)
3646 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3647 TheMachineFunction(DAG.getMachineFunction()),
3648 TheFunction(TheMachineFunction.getFunction()),
3649 FrameInfo(TheMachineFunction.getFrameInfo()),
3650 FrameLowering(*Subtarget.getFrameLowering()),
3651 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3652 CCInfo(CCInfo) {}
3653
3654 // Lower variable arguments parameters.
3655 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3656
3657private:
3658 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3659
3660 void forwardMustTailParameters(SDValue &Chain);
3661
3662 bool is64Bit() const { return Subtarget.is64Bit(); }
3663 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3664
3665 X86MachineFunctionInfo *FuncInfo;
3666 const SDLoc &DL;
3667 SelectionDAG &DAG;
3668 const X86Subtarget &Subtarget;
3669 MachineFunction &TheMachineFunction;
3670 const Function &TheFunction;
3671 MachineFrameInfo &FrameInfo;
3672 const TargetFrameLowering &FrameLowering;
3673 const TargetLowering &TargLowering;
3674 CallingConv::ID CallConv;
3675 CCState &CCInfo;
3676};
3677} // namespace
3678
3679void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3680 SDValue &Chain, unsigned StackSize) {
3681 // If the function takes variable number of arguments, make a frame index for
3682 // the start of the first vararg value... for expansion of llvm.va_start. We
3683 // can skip this if there are no va_start calls.
3684 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3685 CallConv != CallingConv::X86_ThisCall)) {
3686 FuncInfo->setVarArgsFrameIndex(
3687 FrameInfo.CreateFixedObject(1, StackSize, true));
3688 }
3689
3690 // 64-bit calling conventions support varargs and register parameters, so we
3691 // have to do extra work to spill them in the prologue.
3692 if (is64Bit()) {
3693 // Find the first unallocated argument registers.
3694 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3695 ArrayRef<MCPhysReg> ArgXMMs =
3696 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3697 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3698 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3699
3700 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3701, __extension__
__PRETTY_FUNCTION__))
3701 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3701, __extension__
__PRETTY_FUNCTION__))
;
3702
3703 if (isWin64()) {
3704 // Get to the caller-allocated home save location. Add 8 to account
3705 // for the return address.
3706 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3707 FuncInfo->setRegSaveFrameIndex(
3708 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3709 // Fixup to set vararg frame on shadow area (4 x i64).
3710 if (NumIntRegs < 4)
3711 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3712 } else {
3713 // For X86-64, if there are vararg parameters that are passed via
3714 // registers, then we must store them to their spots on the stack so
3715 // they may be loaded by dereferencing the result of va_next.
3716 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3717 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3718 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3719 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3720 }
3721
3722 SmallVector<SDValue, 6>
3723 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3724 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3725 // keeping live input value
3726 SDValue ALVal; // if applicable keeps SDValue for %al register
3727
3728 // Gather all the live in physical registers.
3729 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3730 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3731 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3732 }
3733 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3734 if (!AvailableXmms.empty()) {
3735 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3736 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3737 for (MCPhysReg Reg : AvailableXmms) {
3738 // FastRegisterAllocator spills virtual registers at basic
3739 // block boundary. That leads to usages of xmm registers
3740 // outside of check for %al. Pass physical registers to
3741 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
3742 TheMachineFunction.getRegInfo().addLiveIn(Reg);
3743 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
3744 }
3745 }
3746
3747 // Store the integer parameter registers.
3748 SmallVector<SDValue, 8> MemOps;
3749 SDValue RSFIN =
3750 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3751 TargLowering.getPointerTy(DAG.getDataLayout()));
3752 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3753 for (SDValue Val : LiveGPRs) {
3754 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3755 TargLowering.getPointerTy(DAG.getDataLayout()),
3756 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3757 SDValue Store =
3758 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3759 MachinePointerInfo::getFixedStack(
3760 DAG.getMachineFunction(),
3761 FuncInfo->getRegSaveFrameIndex(), Offset));
3762 MemOps.push_back(Store);
3763 Offset += 8;
3764 }
3765
3766 // Now store the XMM (fp + vector) parameter registers.
3767 if (!LiveXMMRegs.empty()) {
3768 SmallVector<SDValue, 12> SaveXMMOps;
3769 SaveXMMOps.push_back(Chain);
3770 SaveXMMOps.push_back(ALVal);
3771 SaveXMMOps.push_back(RSFIN);
3772 SaveXMMOps.push_back(
3773 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3774 llvm::append_range(SaveXMMOps, LiveXMMRegs);
3775 MachineMemOperand *StoreMMO =
3776 DAG.getMachineFunction().getMachineMemOperand(
3777 MachinePointerInfo::getFixedStack(
3778 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
3779 Offset),
3780 MachineMemOperand::MOStore, 128, Align(16));
3781 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
3782 DL, DAG.getVTList(MVT::Other),
3783 SaveXMMOps, MVT::i8, StoreMMO));
3784 }
3785
3786 if (!MemOps.empty())
3787 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3788 }
3789}
3790
3791void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3792 // Find the largest legal vector type.
3793 MVT VecVT = MVT::Other;
3794 // FIXME: Only some x86_32 calling conventions support AVX512.
3795 if (Subtarget.useAVX512Regs() &&
3796 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3797 CallConv == CallingConv::Intel_OCL_BI)))
3798 VecVT = MVT::v16f32;
3799 else if (Subtarget.hasAVX())
3800 VecVT = MVT::v8f32;
3801 else if (Subtarget.hasSSE2())
3802 VecVT = MVT::v4f32;
3803
3804 // We forward some GPRs and some vector types.
3805 SmallVector<MVT, 2> RegParmTypes;
3806 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3807 RegParmTypes.push_back(IntVT);
3808 if (VecVT != MVT::Other)
3809 RegParmTypes.push_back(VecVT);
3810
3811 // Compute the set of forwarded registers. The rest are scratch.
3812 SmallVectorImpl<ForwardedRegister> &Forwards =
3813 FuncInfo->getForwardedMustTailRegParms();
3814 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3815
3816 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3817 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3818 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3819 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3820 }
3821
3822 // Copy all forwards from physical to virtual registers.
3823 for (ForwardedRegister &FR : Forwards) {
3824 // FIXME: Can we use a less constrained schedule?
3825 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3826 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3827 TargLowering.getRegClassFor(FR.VT));
3828 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3829 }
3830}
3831
3832void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3833 unsigned StackSize) {
3834 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3835 // If necessary, it would be set into the correct value later.
3836 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3837 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3838
3839 if (FrameInfo.hasVAStart())
3840 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3841
3842 if (FrameInfo.hasMustTailInVarArgFunc())
3843 forwardMustTailParameters(Chain);
3844}
3845
3846SDValue X86TargetLowering::LowerFormalArguments(
3847 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3848 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3849 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3850 MachineFunction &MF = DAG.getMachineFunction();
3851 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3852
3853 const Function &F = MF.getFunction();
3854 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3855 F.getName() == "main")
3856 FuncInfo->setForceFramePointer(true);
3857
3858 MachineFrameInfo &MFI = MF.getFrameInfo();
3859 bool Is64Bit = Subtarget.is64Bit();
3860 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3861
3862 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3864, __extension__
__PRETTY_FUNCTION__))
3863 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3864, __extension__
__PRETTY_FUNCTION__))
3864 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3864, __extension__
__PRETTY_FUNCTION__))
;
3865
3866 // Assign locations to all of the incoming arguments.
3867 SmallVector<CCValAssign, 16> ArgLocs;
3868 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3869
3870 // Allocate shadow area for Win64.
3871 if (IsWin64)
3872 CCInfo.AllocateStack(32, Align(8));
3873
3874 CCInfo.AnalyzeArguments(Ins, CC_X86);
3875
3876 // In vectorcall calling convention a second pass is required for the HVA
3877 // types.
3878 if (CallingConv::X86_VectorCall == CallConv) {
3879 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3880 }
3881
3882 // The next loop assumes that the locations are in the same order of the
3883 // input arguments.
3884 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3885, __extension__
__PRETTY_FUNCTION__))
3885 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3885, __extension__
__PRETTY_FUNCTION__))
;
3886
3887 SDValue ArgValue;
3888 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3889 ++I, ++InsIndex) {
3890 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3890, __extension__
__PRETTY_FUNCTION__))
;
3891 CCValAssign &VA = ArgLocs[I];
3892
3893 if (VA.isRegLoc()) {
3894 EVT RegVT = VA.getLocVT();
3895 if (VA.needsCustom()) {
3896 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3898, __extension__
__PRETTY_FUNCTION__))
3897 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3898, __extension__
__PRETTY_FUNCTION__))
3898 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 3898, __extension__
__PRETTY_FUNCTION__))
;
3899
3900 // v64i1 values, in regcall calling convention, that are
3901 // compiled to 32 bit arch, are split up into two registers.
3902 ArgValue =
3903 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3904 } else {
3905 const TargetRegisterClass *RC;
3906 if (RegVT == MVT::i8)
3907 RC = &X86::GR8RegClass;
3908 else if (RegVT == MVT::i16)
3909 RC = &X86::GR16RegClass;
3910 else if (RegVT == MVT::i32)
3911 RC = &X86::GR32RegClass;
3912 else if (Is64Bit && RegVT == MVT::i64)
3913 RC = &X86::GR64RegClass;
3914 else if (RegVT == MVT::f16)
3915 RC = &X86::FR16XRegClass;
3916 else if (RegVT == MVT::f32)
3917 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3918 else if (RegVT == MVT::f64)
3919 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3920 else if (RegVT == MVT::f80)
3921 RC = &X86::RFP80RegClass;
3922 else if (RegVT == MVT::f128)
3923 RC = &X86::VR128RegClass;
3924 else if (RegVT.is512BitVector())
3925 RC = &X86::VR512RegClass;
3926 else if (RegVT.is256BitVector())
3927 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3928 else if (RegVT.is128BitVector())
3929 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3930 else if (RegVT == MVT::x86mmx)
3931 RC = &X86::VR64RegClass;
3932 else if (RegVT == MVT::v1i1)
3933 RC = &X86::VK1RegClass;
3934 else if (RegVT == MVT::v8i1)
3935 RC = &X86::VK8RegClass;
3936 else if (RegVT == MVT::v16i1)
3937 RC = &X86::VK16RegClass;
3938 else if (RegVT == MVT::v32i1)
3939 RC = &X86::VK32RegClass;
3940 else if (RegVT == MVT::v64i1)
3941 RC = &X86::VK64RegClass;
3942 else
3943 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3943)
;
3944
3945 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3946 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3947 }
3948
3949 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3950 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3951 // right size.
3952 if (VA.getLocInfo() == CCValAssign::SExt)
3953 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3954 DAG.getValueType(VA.getValVT()));
3955 else if (VA.getLocInfo() == CCValAssign::ZExt)
3956 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3957 DAG.getValueType(VA.getValVT()));
3958 else if (VA.getLocInfo() == CCValAssign::BCvt)
3959 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3960
3961 if (VA.isExtInLoc()) {
3962 // Handle MMX values passed in XMM regs.
3963 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3964 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3965 else if (VA.getValVT().isVector() &&
3966 VA.getValVT().getScalarType() == MVT::i1 &&
3967 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3968 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3969 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3970 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3971 } else
3972 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3973 }
3974 } else {
3975 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
3975, __extension__ __PRETTY_FUNCTION__))
;
3976 ArgValue =
3977 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3978 }
3979
3980 // If value is passed via pointer - do a load.
3981 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3982 ArgValue =
3983 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3984
3985 InVals.push_back(ArgValue);
3986 }
3987
3988 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3989 if (Ins[I].Flags.isSwiftAsync()) {
3990 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
3991 if (Subtarget.is64Bit())
3992 X86FI->setHasSwiftAsyncContext(true);
3993 else {
3994 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
3995 X86FI->setSwiftAsyncContextFrameIdx(FI);
3996 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
3997 DAG.getFrameIndex(FI, MVT::i32),
3998 MachinePointerInfo::getFixedStack(MF, FI));
3999 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
4000 }
4001 }
4002
4003 // Swift calling convention does not require we copy the sret argument
4004 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
4005 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
4006 continue;
4007
4008 // All x86 ABIs require that for returning structs by value we copy the
4009 // sret argument into %rax/%eax (depending on ABI) for the return. Save
4010 // the argument into a virtual register so that we can access it from the
4011 // return points.
4012 if (Ins[I].Flags.isSRet()) {
4013 assert(!FuncInfo->getSRetReturnReg() &&(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4014, __extension__
__PRETTY_FUNCTION__))
4014 "SRet return has already been set")(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4014, __extension__
__PRETTY_FUNCTION__))
;
4015 MVT PtrTy = getPointerTy(DAG.getDataLayout());
4016 Register Reg =
4017 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
4018 FuncInfo->setSRetReturnReg(Reg);
4019 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
4020 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
4021 break;
4022 }
4023 }
4024
4025 unsigned StackSize = CCInfo.getNextStackOffset();
4026 // Align stack specially for tail calls.
4027 if (shouldGuaranteeTCO(CallConv,
4028 MF.getTarget().Options.GuaranteedTailCallOpt))
4029 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
4030
4031 if (IsVarArg)
4032 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
4033 .lowerVarArgsParameters(Chain, StackSize);
4034
4035 // Some CCs need callee pop.
4036 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
4037 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4038 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
4039 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
4040 // X86 interrupts must pop the error code (and the alignment padding) if
4041 // present.
4042 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
4043 } else {
4044 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
4045 // If this is an sret function, the return should pop the hidden pointer.
4046 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
4047 FuncInfo->setBytesToPopOnReturn(4);
4048 }
4049
4050 if (!Is64Bit) {
4051 // RegSaveFrameIndex is X86-64 only.
4052 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4053 }
4054
4055 FuncInfo->setArgumentStackSize(StackSize);
4056
4057 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
4058 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
4059 if (Personality == EHPersonality::CoreCLR) {
4060 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "llvm/lib/Target/X86/X86ISelLowering.cpp", 4060,
__extension__ __PRETTY_FUNCTION__))
;
4061 // TODO: Add a mechanism to frame lowering that will allow us to indicate
4062 // that we'd prefer this slot be allocated towards the bottom of the frame
4063 // (i.e. near the stack pointer after allocating the frame). Every
4064 // funclet needs a copy of this slot in its (mostly empty) frame, and the
4065 // offset from the bottom of this and each funclet's frame must be the
4066 // same, so the size of funclets' (mostly empty) frames is dictated by
4067 // how far this slot is from the bottom (since they allocate just enough
4068 // space to accommodate holding this slot at the correct offset).
4069 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
4070 EHInfo->PSPSymFrameIdx = PSPSymFI;
4071 }
4072 }
4073
4074 if (CallConv == CallingConv::X86_RegCall ||
4075 F.hasFnAttribute("no_caller_saved_registers")) {
4076 MachineRegisterInfo &MRI = MF.getRegInfo();
4077 for (std::pair<Register, Register> Pair : MRI.liveins())
4078 MRI.disableCalleeSavedRegister(Pair.first);
4079 }
4080
4081 return Chain;
4082}
4083
4084SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
4085 SDValue Arg, const SDLoc &dl,
4086 SelectionDAG &DAG,
4087 const CCValAssign &VA,
4088 ISD::ArgFlagsTy Flags,
4089 bool isByVal) const {
4090 unsigned LocMemOffset = VA.getLocMemOffset();
4091 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4092 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4093 StackPtr, PtrOff);
4094 if (isByVal)
4095 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
4096
4097 MaybeAlign Alignment;
4098 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
4099 Arg.getSimpleValueType() != MVT::f80)
4100 Alignment = MaybeAlign(4);
4101 return DAG.getStore(
4102 Chain, dl, Arg, PtrOff,
4103 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
4104 Alignment);
4105}
4106
4107/// Emit a load of return address if tail call
4108/// optimization is performed and it is required.
4109SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
4110 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
4111 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
4112 // Adjust the Return address stack slot.
4113 EVT VT = getPointerTy(DAG.getDataLayout());
4114 OutRetAddr = getReturnAddressFrameIndex(DAG);
4115
4116 // Load the "old" Return address.
4117 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
4118 return SDValue(OutRetAddr.getNode(), 1);
4119}
4120
4121/// Emit a store of the return address if tail call
4122/// optimization is performed and it is required (FPDiff!=0).
4123static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
4124 SDValue Chain, SDValue RetAddrFrIdx,
4125 EVT PtrVT, unsigned SlotSize,
4126 int FPDiff, const SDLoc &dl) {
4127 // Store the return address to the appropriate stack slot.
4128 if (!FPDiff) return Chain;
4129 // Calculate the new stack slot for the return address.
4130 int NewReturnAddrFI =
4131 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
4132 false);
4133 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
4134 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
4135 MachinePointerInfo::getFixedStack(
4136 DAG.getMachineFunction(), NewReturnAddrFI));
4137 return Chain;
4138}
4139
4140/// Returns a vector_shuffle mask for an movs{s|d}, movd
4141/// operation of specified width.
4142static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
4143 SDValue V2) {
4144 unsigned NumElems = VT.getVectorNumElements();
4145 SmallVector<int, 8> Mask;
4146 Mask.push_back(NumElems);
4147 for (unsigned i = 1; i != NumElems; ++i)
4148 Mask.push_back(i);
4149 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
4150}
4151
4152SDValue
4153X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4154 SmallVectorImpl<SDValue> &InVals) const {
4155 SelectionDAG &DAG = CLI.DAG;
4156 SDLoc &dl = CLI.DL;
4157 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4158 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4159 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4160 SDValue Chain = CLI.Chain;
4161 SDValue Callee = CLI.Callee;
4162 CallingConv::ID CallConv = CLI.CallConv;
4163 bool &isTailCall = CLI.IsTailCall;
4164 bool isVarArg = CLI.IsVarArg;
4165 const auto *CB = CLI.CB;
4166
4167 MachineFunction &MF = DAG.getMachineFunction();
4168 bool Is64Bit = Subtarget.is64Bit();
4169 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4170 bool IsSibcall = false;
4171 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
4172 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
4173 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
4174 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
4175 bool HasNCSR = (CB && isa<CallInst>(CB) &&
4176 CB->hasFnAttr("no_caller_saved_registers"));
4177 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
4178 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
4179 const Module *M = MF.getMMI().getModule();
4180 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
4181
4182 MachineFunction::CallSiteInfo CSInfo;
4183 if (CallConv == CallingConv::X86_INTR)
4184 report_fatal_error("X86 interrupts may not be called directly");
4185
4186 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
4187 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
4188 // If we are using a GOT, disable tail calls to external symbols with
4189 // default visibility. Tail calling such a symbol requires using a GOT
4190 // relocation, which forces early binding of the symbol. This breaks code
4191 // that require lazy function symbol resolution. Using musttail or
4192 // GuaranteedTailCallOpt will override this.
4193 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4194 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
4195 G->getGlobal()->hasDefaultVisibility()))
4196 isTailCall = false;
4197 }
4198
4199 if (isTailCall && !IsMustTail) {
4200 // Check if it's really possible to do a tail call.
4201 isTailCall = IsEligibleForTailCallOptimization(
4202 Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
4203 Ins, DAG);
4204
4205 // Sibcalls are automatically detected tailcalls which do not require
4206 // ABI changes.
4207 if (!IsGuaranteeTCO && isTailCall)
4208 IsSibcall = true;
4209
4210 if (isTailCall)
4211 ++NumTailCalls;
4212 }
4213
4214 if (IsMustTail && !isTailCall)
4215 report_fatal_error("failed to perform tail call elimination on a call "
4216 "site marked musttail");
4217
4218 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4219, __extension__
__PRETTY_FUNCTION__))
4219 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4219, __extension__
__PRETTY_FUNCTION__))
;
4220
4221 // Analyze operands of the call, assigning locations to each operand.
4222 SmallVector<CCValAssign, 16> ArgLocs;
4223 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4224
4225 // Allocate shadow area for Win64.
4226 if (IsWin64)
4227 CCInfo.AllocateStack(32, Align(8));
4228
4229 CCInfo.AnalyzeArguments(Outs, CC_X86);
4230
4231 // In vectorcall calling convention a second pass is required for the HVA
4232 // types.
4233 if (CallingConv::X86_VectorCall == CallConv) {
4234 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
4235 }
4236
4237 // Get a count of how many bytes are to be pushed on the stack.
4238 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
4239 if (IsSibcall)
4240 // This is a sibcall. The memory operands are available in caller's
4241 // own caller's stack.
4242 NumBytes = 0;
4243 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4244 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4245
4246 int FPDiff = 0;
4247 if (isTailCall &&
4248 shouldGuaranteeTCO(CallConv,
4249 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4250 // Lower arguments at fp - stackoffset + fpdiff.
4251 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4252
4253 FPDiff = NumBytesCallerPushed - NumBytes;
4254
4255 // Set the delta of movement of the returnaddr stackslot.
4256 // But only set if delta is greater than previous delta.
4257 if (FPDiff < X86Info->getTCReturnAddrDelta())
4258 X86Info->setTCReturnAddrDelta(FPDiff);
4259 }
4260
4261 unsigned NumBytesToPush = NumBytes;
4262 unsigned NumBytesToPop = NumBytes;
4263
4264 // If we have an inalloca argument, all stack space has already been allocated
4265 // for us and be right at the top of the stack. We don't support multiple
4266 // arguments passed in memory when using inalloca.
4267 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4268 NumBytesToPush = 0;
4269 if (!ArgLocs.back().isMemLoc())
4270 report_fatal_error("cannot use inalloca attribute on a register "
4271 "parameter");
4272 if (ArgLocs.back().getLocMemOffset() != 0)
4273 report_fatal_error("any parameter with the inalloca attribute must be "
4274 "the only memory argument");
4275 } else if (CLI.IsPreallocated) {
4276 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4278, __extension__
__PRETTY_FUNCTION__))
4277 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4278, __extension__
__PRETTY_FUNCTION__))
4278 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4278, __extension__
__PRETTY_FUNCTION__))
;
4279 SmallVector<size_t, 4> PreallocatedOffsets;
4280 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4281 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4282 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4283 }
4284 }
4285 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4286 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4287 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4288 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4289 NumBytesToPush = 0;
4290 }
4291
4292 if (!IsSibcall && !IsMustTail)
4293 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4294 NumBytes - NumBytesToPush, dl);
4295
4296 SDValue RetAddrFrIdx;
4297 // Load return address for tail calls.
4298 if (isTailCall && FPDiff)
4299 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4300 Is64Bit, FPDiff, dl);
4301
4302 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4303 SmallVector<SDValue, 8> MemOpChains;
4304 SDValue StackPtr;
4305
4306 // The next loop assumes that the locations are in the same order of the
4307 // input arguments.
4308 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4309, __extension__
__PRETTY_FUNCTION__))
4309 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4309, __extension__
__PRETTY_FUNCTION__))
;
4310
4311 // Walk the register/memloc assignments, inserting copies/loads. In the case
4312 // of tail call optimization arguments are handle later.
4313 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4314 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4315 ++I, ++OutIndex) {
4316 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4316, __extension__
__PRETTY_FUNCTION__))
;
4317 // Skip inalloca/preallocated arguments, they have already been written.
4318 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4319 if (Flags.isInAlloca() || Flags.isPreallocated())
4320 continue;
4321
4322 CCValAssign &VA = ArgLocs[I];
4323 EVT RegVT = VA.getLocVT();
4324 SDValue Arg = OutVals[OutIndex];
4325 bool isByVal = Flags.isByVal();
4326
4327 // Promote the value if needed.
4328 switch (VA.getLocInfo()) {
4329 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4329)
;
4330 case CCValAssign::Full: break;
4331 case CCValAssign::SExt:
4332 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4333 break;
4334 case CCValAssign::ZExt:
4335 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4336 break;
4337 case CCValAssign::AExt:
4338 if (Arg.getValueType().isVector() &&
4339 Arg.getValueType().getVectorElementType() == MVT::i1)
4340 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4341 else if (RegVT.is128BitVector()) {
4342 // Special case: passing MMX values in XMM registers.
4343 Arg = DAG.getBitcast(MVT::i64, Arg);
4344 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4345 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4346 } else
4347 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4348 break;
4349 case CCValAssign::BCvt:
4350 Arg = DAG.getBitcast(RegVT, Arg);
4351 break;
4352 case CCValAssign::Indirect: {
4353 if (isByVal) {
4354 // Memcpy the argument to a temporary stack slot to prevent
4355 // the caller from seeing any modifications the callee may make
4356 // as guaranteed by the `byval` attribute.
4357 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4358 Flags.getByValSize(),
4359 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4360 SDValue StackSlot =
4361 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4362 Chain =
4363 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4364 // From now on treat this as a regular pointer
4365 Arg = StackSlot;
4366 isByVal = false;
4367 } else {
4368 // Store the argument.
4369 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4370 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4371 Chain = DAG.getStore(
4372 Chain, dl, Arg, SpillSlot,
4373 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4374 Arg = SpillSlot;
4375 }
4376 break;
4377 }
4378 }
4379
4380 if (VA.needsCustom()) {
4381 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4382, __extension__
__PRETTY_FUNCTION__))
4382 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4382, __extension__
__PRETTY_FUNCTION__))
;
4383 // Split v64i1 value into two registers
4384 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4385 } else if (VA.isRegLoc()) {
4386 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4387 const TargetOptions &Options = DAG.getTarget().Options;
4388 if (Options.EmitCallSiteInfo)
4389 CSInfo.emplace_back(VA.getLocReg(), I);
4390 if (isVarArg && IsWin64) {
4391 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4392 // shadow reg if callee is a varargs function.
4393 Register ShadowReg;
4394 switch (VA.getLocReg()) {
4395 case X86::XMM0: ShadowReg = X86::RCX; break;
4396 case X86::XMM1: ShadowReg = X86::RDX; break;
4397 case X86::XMM2: ShadowReg = X86::R8; break;
4398 case X86::XMM3: ShadowReg = X86::R9; break;
4399 }
4400 if (ShadowReg)
4401 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4402 }
4403 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4404 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4404, __extension__ __PRETTY_FUNCTION__))
;
4405 if (!StackPtr.getNode())
4406 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4407 getPointerTy(DAG.getDataLayout()));
4408 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4409 dl, DAG, VA, Flags, isByVal));
4410 }
4411 }
4412
4413 if (!MemOpChains.empty())
4414 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4415
4416 if (Subtarget.isPICStyleGOT()) {
4417 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4418 // GOT pointer (except regcall).
4419 if (!isTailCall) {
4420 // Indirect call with RegCall calling convertion may use up all the
4421 // general registers, so it is not suitable to bind EBX reister for
4422 // GOT address, just let register allocator handle it.
4423 if (CallConv != CallingConv::X86_RegCall)
4424 RegsToPass.push_back(std::make_pair(
4425 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4426 getPointerTy(DAG.getDataLayout()))));
4427 } else {
4428 // If we are tail calling and generating PIC/GOT style code load the
4429 // address of the callee into ECX. The value in ecx is used as target of
4430 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4431 // for tail calls on PIC/GOT architectures. Normally we would just put the
4432 // address of GOT into ebx and then call target@PLT. But for tail calls
4433 // ebx would be restored (since ebx is callee saved) before jumping to the
4434 // target@PLT.
4435
4436 // Note: The actual moving to ECX is done further down.
4437 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4438 if (G && !G->getGlobal()->hasLocalLinkage() &&
4439 G->getGlobal()->hasDefaultVisibility())
4440 Callee = LowerGlobalAddress(Callee, DAG);
4441 else if (isa<ExternalSymbolSDNode>(Callee))
4442 Callee = LowerExternalSymbol(Callee, DAG);
4443 }
4444 }
4445
4446 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
4447 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
4448 // From AMD64 ABI document:
4449 // For calls that may call functions that use varargs or stdargs
4450 // (prototype-less calls or calls to functions containing ellipsis (...) in
4451 // the declaration) %al is used as hidden argument to specify the number
4452 // of SSE registers used. The contents of %al do not need to match exactly
4453 // the number of registers, but must be an ubound on the number of SSE
4454 // registers used and is in the range 0 - 8 inclusive.
4455
4456 // Count the number of XMM registers allocated.
4457 static const MCPhysReg XMMArgRegs[] = {
4458 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4459 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4460 };
4461 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4462 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4463, __extension__
__PRETTY_FUNCTION__))
4463 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4463, __extension__
__PRETTY_FUNCTION__))
;
4464 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4465 DAG.getConstant(NumXMMRegs, dl,
4466 MVT::i8)));
4467 }
4468
4469 if (isVarArg && IsMustTail) {
4470 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4471 for (const auto &F : Forwards) {
4472 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4473 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4474 }
4475 }
4476
4477 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4478 // don't need this because the eligibility check rejects calls that require
4479 // shuffling arguments passed in memory.
4480 if (!IsSibcall && isTailCall) {
4481 // Force all the incoming stack arguments to be loaded from the stack
4482 // before any new outgoing arguments are stored to the stack, because the
4483 // outgoing stack slots may alias the incoming argument stack slots, and
4484 // the alias isn't otherwise explicit. This is slightly more conservative
4485 // than necessary, because it means that each store effectively depends
4486 // on every argument instead of just those arguments it would clobber.
4487 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4488
4489 SmallVector<SDValue, 8> MemOpChains2;
4490 SDValue FIN;
4491 int FI = 0;
4492 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4493 ++I, ++OutsIndex) {
4494 CCValAssign &VA = ArgLocs[I];
4495
4496 if (VA.isRegLoc()) {
4497 if (VA.needsCustom()) {
4498 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4499, __extension__
__PRETTY_FUNCTION__))
4499 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4499, __extension__
__PRETTY_FUNCTION__))
;
4500 // This means that we are in special case where one argument was
4501 // passed through two register locations - Skip the next location
4502 ++I;
4503 }
4504
4505 continue;
4506 }
4507
4508 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4508, __extension__ __PRETTY_FUNCTION__))
;
4509 SDValue Arg = OutVals[OutsIndex];
4510 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4511 // Skip inalloca/preallocated arguments. They don't require any work.
4512 if (Flags.isInAlloca() || Flags.isPreallocated())
4513 continue;
4514 // Create frame index.
4515 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4516 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4517 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4518 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4519
4520 if (Flags.isByVal()) {
4521 // Copy relative to framepointer.
4522 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4523 if (!StackPtr.getNode())
4524 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4525 getPointerTy(DAG.getDataLayout()));
4526 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4527 StackPtr, Source);
4528
4529 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4530 ArgChain,
4531 Flags, DAG, dl));
4532 } else {
4533 // Store relative to framepointer.
4534 MemOpChains2.push_back(DAG.getStore(
4535 ArgChain, dl, Arg, FIN,
4536 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4537 }
4538 }
4539
4540 if (!MemOpChains2.empty())
4541 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4542
4543 // Store the return address to the appropriate stack slot.
4544 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4545 getPointerTy(DAG.getDataLayout()),
4546 RegInfo->getSlotSize(), FPDiff, dl);
4547 }
4548
4549 // Build a sequence of copy-to-reg nodes chained together with token chain
4550 // and flag operands which copy the outgoing args into registers.
4551 SDValue InFlag;
4552 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4553 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4554 RegsToPass[i].second, InFlag);
4555 InFlag = Chain.getValue(1);
4556 }
4557
4558 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4559 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4559, __extension__
__PRETTY_FUNCTION__))
;
4560 // In the 64-bit large code model, we have to make all calls
4561 // through a register, since the call instruction's 32-bit
4562 // pc-relative offset may not be large enough to hold the whole
4563 // address.
4564 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4565 Callee->getOpcode() == ISD::ExternalSymbol) {
4566 // Lower direct calls to global addresses and external symbols. Setting
4567 // ForCall to true here has the effect of removing WrapperRIP when possible
4568 // to allow direct calls to be selected without first materializing the
4569 // address into a register.
4570 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4571 } else if (Subtarget.isTarget64BitILP32() &&
4572 Callee.getValueType() == MVT::i32) {
4573 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4574 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4575 }
4576
4577 // Returns a chain & a flag for retval copy to use.
4578 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4579 SmallVector<SDValue, 8> Ops;
4580
4581 if (!IsSibcall && isTailCall && !IsMustTail) {
4582 Chain = DAG.getCALLSEQ_END(Chain,
4583 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4584 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4585 InFlag = Chain.getValue(1);
4586 }
4587
4588 Ops.push_back(Chain);
4589 Ops.push_back(Callee);
4590
4591 if (isTailCall)
4592 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4593
4594 // Add argument registers to the end of the list so that they are known live
4595 // into the call.
4596 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4597 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4598 RegsToPass[i].second.getValueType()));
4599
4600 // Add a register mask operand representing the call-preserved registers.
4601 const uint32_t *Mask = [&]() {
4602 auto AdaptedCC = CallConv;
4603 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4604 // use X86_INTR calling convention because it has the same CSR mask
4605 // (same preserved registers).
4606 if (HasNCSR)
4607 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4608 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4609 // to use the CSR_NoRegs_RegMask.
4610 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4611 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4612 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4613 }();
4614 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4614, __extension__
__PRETTY_FUNCTION__))
;
4615
4616 // If this is an invoke in a 32-bit function using a funclet-based
4617 // personality, assume the function clobbers all registers. If an exception
4618 // is thrown, the runtime will not restore CSRs.
4619 // FIXME: Model this more precisely so that we can register allocate across
4620 // the normal edge and spill and fill across the exceptional edge.
4621 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4622 const Function &CallerFn = MF.getFunction();
4623 EHPersonality Pers =
4624 CallerFn.hasPersonalityFn()
4625 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4626 : EHPersonality::Unknown;
4627 if (isFuncletEHPersonality(Pers))
4628 Mask = RegInfo->getNoPreservedMask();
4629 }
4630
4631 // Define a new register mask from the existing mask.
4632 uint32_t *RegMask = nullptr;
4633
4634 // In some calling conventions we need to remove the used physical registers
4635 // from the reg mask.
4636 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4637 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4638
4639 // Allocate a new Reg Mask and copy Mask.
4640 RegMask = MF.allocateRegMask();
4641 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4642 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4643
4644 // Make sure all sub registers of the argument registers are reset
4645 // in the RegMask.
4646 for (auto const &RegPair : RegsToPass)
4647 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4648 SubRegs.isValid(); ++SubRegs)
4649 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4650
4651 // Create the RegMask Operand according to our updated mask.
4652 Ops.push_back(DAG.getRegisterMask(RegMask));
4653 } else {
4654 // Create the RegMask Operand according to the static mask.
4655 Ops.push_back(DAG.getRegisterMask(Mask));
4656 }
4657
4658 if (InFlag.getNode())
4659 Ops.push_back(InFlag);
4660
4661 if (isTailCall) {
4662 // We used to do:
4663 //// If this is the first return lowered for this function, add the regs
4664 //// to the liveout set for the function.
4665 // This isn't right, although it's probably harmless on x86; liveouts
4666 // should be computed from returns not tail calls. Consider a void
4667 // function making a tail call to a function returning int.
4668 MF.getFrameInfo().setHasTailCall();
4669 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4670 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4671 return Ret;
4672 }
4673
4674 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4675 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4676 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4677 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4678 // expanded to the call, directly followed by a special marker sequence and
4679 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4680 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4681, __extension__
__PRETTY_FUNCTION__))
4681 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4681, __extension__
__PRETTY_FUNCTION__))
;
4682 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4682, __extension__
__PRETTY_FUNCTION__))
;
4683
4684 // Add a target global address for the retainRV/claimRV runtime function
4685 // just before the call target.
4686 Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
4687 auto PtrVT = getPointerTy(DAG.getDataLayout());
4688 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
4689 Ops.insert(Ops.begin() + 1, GA);
4690 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4691 } else {
4692 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4693 }
4694
4695 InFlag = Chain.getValue(1);
4696 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4697 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4698
4699 // Save heapallocsite metadata.
4700 if (CLI.CB)
4701 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4702 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4703
4704 // Create the CALLSEQ_END node.
4705 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
4706 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4707 DAG.getTarget().Options.GuaranteedTailCallOpt))
4708 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4709 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
4710 // If this call passes a struct-return pointer, the callee
4711 // pops that struct pointer.
4712 NumBytesForCalleeToPop = 4;
4713
4714 // Returns a flag for retval copy to use.
4715 if (!IsSibcall) {
4716 Chain = DAG.getCALLSEQ_END(Chain,
4717 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4718 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4719 true),
4720 InFlag, dl);
4721 InFlag = Chain.getValue(1);
4722 }
4723
4724 // Handle result values, copying them out of physregs into vregs that we
4725 // return.
4726 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4727 InVals, RegMask);
4728}
4729
4730//===----------------------------------------------------------------------===//
4731// Fast Calling Convention (tail call) implementation
4732//===----------------------------------------------------------------------===//
4733
4734// Like std call, callee cleans arguments, convention except that ECX is
4735// reserved for storing the tail called function address. Only 2 registers are
4736// free for argument passing (inreg). Tail call optimization is performed
4737// provided:
4738// * tailcallopt is enabled
4739// * caller/callee are fastcc
4740// On X86_64 architecture with GOT-style position independent code only local
4741// (within module) calls are supported at the moment.
4742// To keep the stack aligned according to platform abi the function
4743// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4744// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4745// If a tail called function callee has more arguments than the caller the
4746// caller needs to make sure that there is room to move the RETADDR to. This is
4747// achieved by reserving an area the size of the argument delta right after the
4748// original RETADDR, but before the saved framepointer or the spilled registers
4749// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4750// stack layout:
4751// arg1
4752// arg2
4753// RETADDR
4754// [ new RETADDR
4755// move area ]
4756// (possible EBP)
4757// ESI
4758// EDI
4759// local1 ..
4760
4761/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4762/// requirement.
4763unsigned
4764X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4765 SelectionDAG &DAG) const {
4766 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4767 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4768 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4769, __extension__
__PRETTY_FUNCTION__))
4769 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 4769, __extension__
__PRETTY_FUNCTION__))
;
4770 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4771}
4772
4773/// Return true if the given stack call argument is already available in the
4774/// same position (relatively) of the caller's incoming argument stack.
4775static
4776bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4777 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4778 const X86InstrInfo *TII, const CCValAssign &VA) {
4779 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4780
4781 for (;;) {
4782 // Look through nodes that don't alter the bits of the incoming value.
4783 unsigned Op = Arg.getOpcode();
4784 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4785 Arg = Arg.getOperand(0);
4786 continue;
4787 }
4788 if (Op == ISD::TRUNCATE) {
4789 const SDValue &TruncInput = Arg.getOperand(0);
4790 if (TruncInput.getOpcode() == ISD::AssertZext &&
4791 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4792 Arg.getValueType()) {
4793 Arg = TruncInput.getOperand(0);
4794 continue;
4795 }
4796 }
4797 break;
4798 }
4799
4800 int FI = INT_MAX2147483647;
4801 if (Arg.getOpcode() == ISD::CopyFromReg) {
4802 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4803 if (!VR.isVirtual())
4804 return false;
4805 MachineInstr *Def = MRI->getVRegDef(VR);
4806 if (!Def)
4807 return false;
4808 if (!Flags.isByVal()) {
4809 if (!TII->isLoadFromStackSlot(*Def, FI))
4810 return false;
4811 } else {
4812 unsigned Opcode = Def->getOpcode();
4813 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4814 Opcode == X86::LEA64_32r) &&
4815 Def->getOperand(1).isFI()) {
4816 FI = Def->getOperand(1).getIndex();
4817 Bytes = Flags.getByValSize();
4818 } else
4819 return false;
4820 }
4821 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4822 if (Flags.isByVal())
4823 // ByVal argument is passed in as a pointer but it's now being
4824 // dereferenced. e.g.
4825 // define @foo(%struct.X* %A) {
4826 // tail call @bar(%struct.X* byval %A)
4827 // }
4828 return false;
4829 SDValue Ptr = Ld->getBasePtr();
4830 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4831 if (!FINode)
4832 return false;
4833 FI = FINode->getIndex();
4834 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4835 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4836 FI = FINode->getIndex();
4837 Bytes = Flags.getByValSize();
4838 } else
4839 return false;
4840
4841 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "llvm/lib/Target/X86/X86ISelLowering.cpp",
4841, __extension__ __PRETTY_FUNCTION__))
;
4842 if (!MFI.isFixedObjectIndex(FI))
4843 return false;
4844
4845 if (Offset != MFI.getObjectOffset(FI))
4846 return false;
4847
4848 // If this is not byval, check that the argument stack object is immutable.
4849 // inalloca and argument copy elision can create mutable argument stack
4850 // objects. Byval objects can be mutated, but a byval call intends to pass the
4851 // mutated memory.
4852 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4853 return false;
4854
4855 if (VA.getLocVT().getFixedSizeInBits() >
4856 Arg.getValueSizeInBits().getFixedSize()) {
4857 // If the argument location is wider than the argument type, check that any
4858 // extension flags match.
4859 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4860 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4861 return false;
4862 }
4863 }
4864
4865 return Bytes == MFI.getObjectSize(FI);
4866}
4867
4868/// Check whether the call is eligible for tail call optimization. Targets
4869/// that want to do tail call optimization should implement this function.
4870bool X86TargetLowering::IsEligibleForTailCallOptimization(
4871 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
4872 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
4873 const SmallVectorImpl<SDValue> &OutVals,
4874 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4875 if (!mayTailCallThisCC(CalleeCC))
4876 return false;
4877
4878 // If -tailcallopt is specified, make fastcc functions tail-callable.
4879 MachineFunction &MF = DAG.getMachineFunction();
4880 const Function &CallerF = MF.getFunction();
4881
4882 // If the function return type is x86_fp80 and the callee return type is not,
4883 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4884 // perform a tailcall optimization here.
4885 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4886 return false;
4887
4888 CallingConv::ID CallerCC = CallerF.getCallingConv();
4889 bool CCMatch = CallerCC == CalleeCC;
4890 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4891 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4892 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4893 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
4894
4895 // Win64 functions have extra shadow space for argument homing. Don't do the
4896 // sibcall if the caller and callee have mismatched expectations for this
4897 // space.
4898 if (IsCalleeWin64 != IsCallerWin64)
4899 return false;
4900
4901 if (IsGuaranteeTCO) {
4902 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4903 return true;
4904 return false;
4905 }
4906
4907 // Look for obvious safe cases to perform tail call optimization that do not
4908 // require ABI changes. This is what gcc calls sibcall.
4909
4910 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4911 // emit a special epilogue.
4912 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4913 if (RegInfo->hasStackRealignment(MF))
4914 return false;
4915
4916 // Also avoid sibcall optimization if we're an sret return fn and the callee
4917 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
4918 // insufficient.
4919 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
4920 // For a compatible tail call the callee must return our sret pointer. So it
4921 // needs to be (a) an sret function itself and (b) we pass our sret as its
4922 // sret. Condition #b is harder to determine.
4923 return false;
4924 } else if (IsCalleePopSRet)
4925 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
4926 // expect that.
4927 return false;
4928
4929 // Do not sibcall optimize vararg calls unless all arguments are passed via
4930 // registers.
4931 LLVMContext &C = *DAG.getContext();
4932 if (isVarArg && !Outs.empty()) {
4933 // Optimizing for varargs on Win64 is unlikely to be safe without
4934 // additional testing.
4935 if (IsCalleeWin64 || IsCallerWin64)
4936 return false;
4937
4938 SmallVector<CCValAssign, 16> ArgLocs;
4939 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4940
4941 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4942 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4943 if (!ArgLocs[i].isRegLoc())
4944 return false;
4945 }
4946
4947 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4948 // stack. Therefore, if it's not used by the call it is not safe to optimize
4949 // this into a sibcall.
4950 bool Unused = false;
4951 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4952 if (!Ins[i].Used) {
4953 Unused = true;
4954 break;
4955 }
4956 }
4957 if (Unused) {
4958 SmallVector<CCValAssign, 16> RVLocs;
4959 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4960 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4961 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4962 CCValAssign &VA = RVLocs[i];
4963 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4964 return false;
4965 }
4966 }
4967
4968 // Check that the call results are passed in the same way.
4969 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4970 RetCC_X86, RetCC_X86))
4971 return false;
4972 // The callee has to preserve all registers the caller needs to preserve.
4973 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4974 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4975 if (!CCMatch) {
4976 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4977 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4978 return false;
4979 }
4980
4981 unsigned StackArgsSize = 0;
4982
4983 // If the callee takes no arguments then go on to check the results of the
4984 // call.
4985 if (!Outs.empty()) {
4986 // Check if stack adjustment is needed. For now, do not do this if any
4987 // argument is passed on the stack.
4988 SmallVector<CCValAssign, 16> ArgLocs;
4989 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4990
4991 // Allocate shadow area for Win64
4992 if (IsCalleeWin64)
4993 CCInfo.AllocateStack(32, Align(8));
4994
4995 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4996 StackArgsSize = CCInfo.getNextStackOffset();
4997
4998 if (CCInfo.getNextStackOffset()) {
4999 // Check if the arguments are already laid out in the right way as
5000 // the caller's fixed stack objects.
5001 MachineFrameInfo &MFI = MF.getFrameInfo();
5002 const MachineRegisterInfo *MRI = &MF.getRegInfo();
5003 const X86InstrInfo *TII = Subtarget.getInstrInfo();
5004 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5005 CCValAssign &VA = ArgLocs[i];
5006 SDValue Arg = OutVals[i];
5007 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5008 if (VA.getLocInfo() == CCValAssign::Indirect)
5009 return false;
5010 if (!VA.isRegLoc()) {
5011 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
5012 MFI, MRI, TII, VA))
5013 return false;
5014 }
5015 }
5016 }
5017
5018 bool PositionIndependent = isPositionIndependent();
5019 // If the tailcall address may be in a register, then make sure it's
5020 // possible to register allocate for it. In 32-bit, the call address can
5021 // only target EAX, EDX, or ECX since the tail call must be scheduled after
5022 // callee-saved registers are restored. These happen to be the same
5023 // registers used to pass 'inreg' arguments so watch out for those.
5024 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
5025 !isa<ExternalSymbolSDNode>(Callee)) ||
5026 PositionIndependent)) {
5027 unsigned NumInRegs = 0;
5028 // In PIC we need an extra register to formulate the address computation
5029 // for the callee.
5030 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
5031
5032 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5033 CCValAssign &VA = ArgLocs[i];
5034 if (!VA.isRegLoc())
5035 continue;
5036 Register Reg = VA.getLocReg();
5037 switch (Reg) {
5038 default: break;
5039 case X86::EAX: case X86::EDX: case X86::ECX:
5040 if (++NumInRegs == MaxInRegs)
5041 return false;
5042 break;
5043 }
5044 }
5045 }
5046
5047 const MachineRegisterInfo &MRI = MF.getRegInfo();
5048 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5049 return false;
5050 }
5051
5052 bool CalleeWillPop =
5053 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
5054 MF.getTarget().Options.GuaranteedTailCallOpt);
5055
5056 if (unsigned BytesToPop =
5057 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
5058 // If we have bytes to pop, the callee must pop them.
5059 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
5060 if (!CalleePopMatches)
5061 return false;
5062 } else if (CalleeWillPop && StackArgsSize > 0) {
5063 // If we don't have bytes to pop, make sure the callee doesn't pop any.
5064 return false;
5065 }
5066
5067 return true;
5068}
5069
5070FastISel *
5071X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
5072 const TargetLibraryInfo *libInfo) const {
5073 return X86::createFastISel(funcInfo, libInfo);
5074}
5075
5076//===----------------------------------------------------------------------===//
5077// Other Lowering Hooks
5078//===----------------------------------------------------------------------===//
5079
5080bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
5081 bool AssumeSingleUse) {
5082 if (!AssumeSingleUse && !Op.hasOneUse())
5083 return false;
5084 if (!ISD::isNormalLoad(Op.getNode()))
5085 return false;
5086
5087 // If this is an unaligned vector, make sure the target supports folding it.
5088 auto *Ld = cast<LoadSDNode>(Op.getNode());
5089 if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
5090 Ld->getValueSizeInBits(0) == 128 && Ld->getAlignment() < 16)
5091 return false;
5092
5093 // TODO: If this is a non-temporal load and the target has an instruction
5094 // for it, it should not be folded. See "useNonTemporalLoad()".
5095
5096 return true;
5097}
5098
5099bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
5100 const X86Subtarget &Subtarget,
5101 bool AssumeSingleUse) {
5102 assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory")(static_cast <bool> (Subtarget.hasAVX() && "Expected AVX for broadcast from memory"
) ? void (0) : __assert_fail ("Subtarget.hasAVX() && \"Expected AVX for broadcast from memory\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5102, __extension__
__PRETTY_FUNCTION__))
;
5103 if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
5104 return false;
5105
5106 // We can not replace a wide volatile load with a broadcast-from-memory,
5107 // because that would narrow the load, which isn't legal for volatiles.
5108 auto *Ld = cast<LoadSDNode>(Op.getNode());
5109 return !Ld->isVolatile() ||
5110 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
5111}
5112
5113bool X86::mayFoldIntoStore(SDValue Op) {
5114 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
5115}
5116
5117bool X86::mayFoldIntoZeroExtend(SDValue Op) {
5118 if (Op.hasOneUse()) {
5119 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
5120 return (ISD::ZERO_EXTEND == Opcode);
5121 }
5122 return false;
5123}
5124
5125static bool isTargetShuffle(unsigned Opcode) {
5126 switch(Opcode) {
5127 default: return false;
5128 case X86ISD::BLENDI:
5129 case X86ISD::PSHUFB:
5130 case X86ISD::PSHUFD:
5131 case X86ISD::PSHUFHW:
5132 case X86ISD::PSHUFLW:
5133 case X86ISD::SHUFP:
5134 case X86ISD::INSERTPS:
5135 case X86ISD::EXTRQI:
5136 case X86ISD::INSERTQI:
5137 case X86ISD::VALIGN:
5138 case X86ISD::PALIGNR:
5139 case X86ISD::VSHLDQ:
5140 case X86ISD::VSRLDQ:
5141 case X86ISD::MOVLHPS:
5142 case X86ISD::MOVHLPS:
5143 case X86ISD::MOVSHDUP:
5144 case X86ISD::MOVSLDUP:
5145 case X86ISD::MOVDDUP:
5146 case X86ISD::MOVSS:
5147 case X86ISD::MOVSD:
5148 case X86ISD::MOVSH:
5149 case X86ISD::UNPCKL:
5150 case X86ISD::UNPCKH:
5151 case X86ISD::VBROADCAST:
5152 case X86ISD::VPERMILPI:
5153 case X86ISD::VPERMILPV:
5154 case X86ISD::VPERM2X128:
5155 case X86ISD::SHUF128:
5156 case X86ISD::VPERMIL2:
5157 case X86ISD::VPERMI:
5158 case X86ISD::VPPERM:
5159 case X86ISD::VPERMV:
5160 case X86ISD::VPERMV3:
5161 case X86ISD::VZEXT_MOVL:
5162 return true;
5163 }
5164}
5165
5166static bool isTargetShuffleVariableMask(unsigned Opcode) {
5167 switch (Opcode) {
5168 default: return false;
5169 // Target Shuffles.
5170 case X86ISD::PSHUFB:
5171 case X86ISD::VPERMILPV:
5172 case X86ISD::VPERMIL2:
5173 case X86ISD::VPPERM:
5174 case X86ISD::VPERMV:
5175 case X86ISD::VPERMV3:
5176 return true;
5177 // 'Faux' Target Shuffles.
5178 case ISD::OR:
5179 case ISD::AND:
5180 case X86ISD::ANDNP:
5181 return true;
5182 }
5183}
5184
5185static bool isTargetShuffleSplat(SDValue Op) {
5186 unsigned Opcode = Op.getOpcode();
5187 if (Opcode == ISD::EXTRACT_SUBVECTOR)
5188 return isTargetShuffleSplat(Op.getOperand(0));
5189 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
5190}
5191
5192SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
5193 MachineFunction &MF = DAG.getMachineFunction();
5194 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5195 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
5196 int ReturnAddrIndex = FuncInfo->getRAIndex();
5197
5198 if (ReturnAddrIndex == 0) {
5199 // Set up a frame object for the return address.
5200 unsigned SlotSize = RegInfo->getSlotSize();
5201 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
5202 -(int64_t)SlotSize,
5203 false);
5204 FuncInfo->setRAIndex(ReturnAddrIndex);
5205 }
5206
5207 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
5208}
5209
5210bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
5211 bool hasSymbolicDisplacement) {
5212 // Offset should fit into 32 bit immediate field.
5213 if (!isInt<32>(Offset))
5214 return false;
5215
5216 // If we don't have a symbolic displacement - we don't have any extra
5217 // restrictions.
5218 if (!hasSymbolicDisplacement)
5219 return true;
5220
5221 // FIXME: Some tweaks might be needed for medium code model.
5222 if (M != CodeModel::Small && M != CodeModel::Kernel)
5223 return false;
5224
5225 // For small code model we assume that latest object is 16MB before end of 31
5226 // bits boundary. We may also accept pretty large negative constants knowing
5227 // that all objects are in the positive half of address space.
5228 if (M == CodeModel::Small && Offset < 16*1024*1024)
5229 return true;
5230
5231 // For kernel code model we know that all object resist in the negative half
5232 // of 32bits address space. We may not accept negative offsets, since they may
5233 // be just off and we may accept pretty large positive ones.
5234 if (M == CodeModel::Kernel && Offset >= 0)
5235 return true;
5236
5237 return false;
5238}
5239
5240/// Determines whether the callee is required to pop its own arguments.
5241/// Callee pop is necessary to support tail calls.
5242bool X86::isCalleePop(CallingConv::ID CallingConv,
5243 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
5244 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
5245 // can guarantee TCO.
5246 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
5247 return true;
5248
5249 switch (CallingConv) {
5250 default:
5251 return false;
5252 case CallingConv::X86_StdCall:
5253 case CallingConv::X86_FastCall:
5254 case CallingConv::X86_ThisCall:
5255 case CallingConv::X86_VectorCall:
5256 return !is64Bit;
5257 }
5258}
5259
5260/// Return true if the condition is an signed comparison operation.
5261static bool isX86CCSigned(unsigned X86CC) {
5262 switch (X86CC) {
5263 default:
5264 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5264)
;
5265 case X86::COND_E:
5266 case X86::COND_NE:
5267 case X86::COND_B:
5268 case X86::COND_A:
5269 case X86::COND_BE:
5270 case X86::COND_AE:
5271 return false;
5272 case X86::COND_G:
5273 case X86::COND_GE:
5274 case X86::COND_L:
5275 case X86::COND_LE:
5276 return true;
5277 }
5278}
5279
5280static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5281 switch (SetCCOpcode) {
5282 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5282)
;
5283 case ISD::SETEQ: return X86::COND_E;
5284 case ISD::SETGT: return X86::COND_G;
5285 case ISD::SETGE: return X86::COND_GE;
5286 case ISD::SETLT: return X86::COND_L;
5287 case ISD::SETLE: return X86::COND_LE;
5288 case ISD::SETNE: return X86::COND_NE;
5289 case ISD::SETULT: return X86::COND_B;
5290 case ISD::SETUGT: return X86::COND_A;
5291 case ISD::SETULE: return X86::COND_BE;
5292 case ISD::SETUGE: return X86::COND_AE;
5293 }
5294}
5295
5296/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5297/// condition code, returning the condition code and the LHS/RHS of the
5298/// comparison to make.
5299static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5300 bool isFP, SDValue &LHS, SDValue &RHS,
5301 SelectionDAG &DAG) {
5302 if (!isFP) {
5303 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5304 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
5305 // X > -1 -> X == 0, jump !sign.
5306 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5307 return X86::COND_NS;
5308 }
5309 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
5310 // X < 0 -> X == 0, jump on sign.
5311 return X86::COND_S;
5312 }
5313 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
5314 // X >= 0 -> X == 0, jump on !sign.
5315 return X86::COND_NS;
5316 }
5317 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5318 // X < 1 -> X <= 0
5319 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5320 return X86::COND_LE;
5321 }
5322 }
5323
5324 return TranslateIntegerX86CC(SetCCOpcode);
5325 }
5326
5327 // First determine if it is required or is profitable to flip the operands.
5328
5329 // If LHS is a foldable load, but RHS is not, flip the condition.
5330 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5331 !ISD::isNON_EXTLoad(RHS.getNode())) {
5332 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5333 std::swap(LHS, RHS);
5334 }
5335
5336 switch (SetCCOpcode) {
5337 default: break;
5338 case ISD::SETOLT:
5339 case ISD::SETOLE:
5340 case ISD::SETUGT:
5341 case ISD::SETUGE:
5342 std::swap(LHS, RHS);
5343 break;
5344 }
5345
5346 // On a floating point condition, the flags are set as follows:
5347 // ZF PF CF op
5348 // 0 | 0 | 0 | X > Y
5349 // 0 | 0 | 1 | X < Y
5350 // 1 | 0 | 0 | X == Y
5351 // 1 | 1 | 1 | unordered
5352 switch (SetCCOpcode) {
5353 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5353)
;
5354 case ISD::SETUEQ:
5355 case ISD::SETEQ: return X86::COND_E;
5356 case ISD::SETOLT: // flipped
5357 case ISD::SETOGT:
5358 case ISD::SETGT: return X86::COND_A;
5359 case ISD::SETOLE: // flipped
5360 case ISD::SETOGE:
5361 case ISD::SETGE: return X86::COND_AE;
5362 case ISD::SETUGT: // flipped
5363 case ISD::SETULT:
5364 case ISD::SETLT: return X86::COND_B;
5365 case ISD::SETUGE: // flipped
5366 case ISD::SETULE:
5367 case ISD::SETLE: return X86::COND_BE;
5368 case ISD::SETONE:
5369 case ISD::SETNE: return X86::COND_NE;
5370 case ISD::SETUO: return X86::COND_P;
5371 case ISD::SETO: return X86::COND_NP;
5372 case ISD::SETOEQ:
5373 case ISD::SETUNE: return X86::COND_INVALID;
5374 }
5375}
5376
5377/// Is there a floating point cmov for the specific X86 condition code?
5378/// Current x86 isa includes the following FP cmov instructions:
5379/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5380static bool hasFPCMov(unsigned X86CC) {
5381 switch (X86CC) {
5382 default:
5383 return false;
5384 case X86::COND_B:
5385 case X86::COND_BE:
5386 case X86::COND_E:
5387 case X86::COND_P:
5388 case X86::COND_A:
5389 case X86::COND_AE:
5390 case X86::COND_NE:
5391 case X86::COND_NP:
5392 return true;
5393 }
5394}
5395
5396static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
5397 return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
5398 VT.is512BitVector();
5399}
5400
5401bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5402 const CallInst &I,
5403 MachineFunction &MF,
5404 unsigned Intrinsic) const {
5405 Info.flags = MachineMemOperand::MONone;
5406 Info.offset = 0;
5407
5408 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5409 if (!IntrData) {
5410 switch (Intrinsic) {
5411 case Intrinsic::x86_aesenc128kl:
5412 case Intrinsic::x86_aesdec128kl:
5413 Info.opc = ISD::INTRINSIC_W_CHAIN;
5414 Info.ptrVal = I.getArgOperand(1);
5415 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5416 Info.align = Align(1);
5417 Info.flags |= MachineMemOperand::MOLoad;
5418 return true;
5419 case Intrinsic::x86_aesenc256kl:
5420 case Intrinsic::x86_aesdec256kl:
5421 Info.opc = ISD::INTRINSIC_W_CHAIN;
5422 Info.ptrVal = I.getArgOperand(1);
5423 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5424 Info.align = Align(1);
5425 Info.flags |= MachineMemOperand::MOLoad;
5426 return true;
5427 case Intrinsic::x86_aesencwide128kl:
5428 case Intrinsic::x86_aesdecwide128kl:
5429 Info.opc = ISD::INTRINSIC_W_CHAIN;
5430 Info.ptrVal = I.getArgOperand(0);
5431 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5432 Info.align = Align(1);
5433 Info.flags |= MachineMemOperand::MOLoad;
5434 return true;
5435 case Intrinsic::x86_aesencwide256kl:
5436 case Intrinsic::x86_aesdecwide256kl:
5437 Info.opc = ISD::INTRINSIC_W_CHAIN;
5438 Info.ptrVal = I.getArgOperand(0);
5439 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5440 Info.align = Align(1);
5441 Info.flags |= MachineMemOperand::MOLoad;
5442 return true;
5443 case Intrinsic::x86_atomic_bts:
5444 case Intrinsic::x86_atomic_btc:
5445 case Intrinsic::x86_atomic_btr: {
5446 Info.opc = ISD::INTRINSIC_W_CHAIN;
5447 Info.ptrVal = I.getArgOperand(0);
5448 unsigned Size = I.getType()->getScalarSizeInBits();
5449 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
5450 Info.align = Align(Size);
5451 Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5452 MachineMemOperand::MOVolatile;
5453 return true;
5454 }
5455 }
5456 return false;
5457 }
5458
5459 switch (IntrData->Type) {
5460 case TRUNCATE_TO_MEM_VI8:
5461 case TRUNCATE_TO_MEM_VI16:
5462 case TRUNCATE_TO_MEM_VI32: {
5463 Info.opc = ISD::INTRINSIC_VOID;
5464 Info.ptrVal = I.getArgOperand(0);
5465 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5466 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5467 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5468 ScalarVT = MVT::i8;
5469 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5470 ScalarVT = MVT::i16;
5471 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5472 ScalarVT = MVT::i32;
5473
5474 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5475 Info.align = Align(1);
5476 Info.flags |= MachineMemOperand::MOStore;
5477 break;
5478 }
5479 case GATHER:
5480 case GATHER_AVX2: {
5481 Info.opc = ISD::INTRINSIC_W_CHAIN;
5482 Info.ptrVal = nullptr;
5483 MVT DataVT = MVT::getVT(I.getType());
5484 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5485 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5486 IndexVT.getVectorNumElements());
5487 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5488 Info.align = Align(1);
5489 Info.flags |= MachineMemOperand::MOLoad;
5490 break;
5491 }
5492 case SCATTER: {
5493 Info.opc = ISD::INTRINSIC_VOID;
5494 Info.ptrVal = nullptr;
5495 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5496 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5497 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5498 IndexVT.getVectorNumElements());
5499 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5500 Info.align = Align(1);
5501 Info.flags |= MachineMemOperand::MOStore;
5502 break;
5503 }
5504 default:
5505 return false;
5506 }
5507
5508 return true;
5509}
5510
5511/// Returns true if the target can instruction select the
5512/// specified FP immediate natively. If false, the legalizer will
5513/// materialize the FP immediate as a load from a constant pool.
5514bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5515 bool ForCodeSize) const {
5516 for (const APFloat &FPImm : LegalFPImmediates)
5517 if (Imm.bitwiseIsEqual(FPImm))
5518 return true;
5519 return false;
5520}
5521
5522bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5523 ISD::LoadExtType ExtTy,
5524 EVT NewVT) const {
5525 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5525, __extension__
__PRETTY_FUNCTION__))
;
5526
5527 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5528 // relocation target a movq or addq instruction: don't let the load shrink.
5529 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5530 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5531 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5532 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5533
5534 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5535 // those uses are extracted directly into a store, then the extract + store
5536 // can be store-folded. Therefore, it's probably not worth splitting the load.
5537 EVT VT = Load->getValueType(0);
5538 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5539 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5540 // Skip uses of the chain value. Result 0 of the node is the load value.
5541 if (UI.getUse().getResNo() != 0)
5542 continue;
5543
5544 // If this use is not an extract + store, it's probably worth splitting.
5545 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5546 UI->use_begin()->getOpcode() != ISD::STORE)
5547 return true;
5548 }
5549 // All non-chain uses are extract + store.
5550 return false;
5551 }
5552
5553 return true;
5554}
5555
5556/// Returns true if it is beneficial to convert a load of a constant
5557/// to just the constant itself.
5558bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5559 Type *Ty) const {
5560 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5560, __extension__ __PRETTY_FUNCTION__))
;
5561
5562 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5563 if (BitSize == 0 || BitSize > 64)
5564 return false;
5565 return true;
5566}
5567
5568bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5569 // If we are using XMM registers in the ABI and the condition of the select is
5570 // a floating-point compare and we have blendv or conditional move, then it is
5571 // cheaper to select instead of doing a cross-register move and creating a
5572 // load that depends on the compare result.
5573 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5574 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5575}
5576
5577bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5578 // TODO: It might be a win to ease or lift this restriction, but the generic
5579 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5580 if (VT.isVector() && Subtarget.hasAVX512())
5581 return false;
5582
5583 return true;
5584}
5585
5586bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5587 SDValue C) const {
5588 // TODO: We handle scalars using custom code, but generic combining could make
5589 // that unnecessary.
5590 APInt MulC;
5591 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5592 return false;
5593
5594 // Find the type this will be legalized too. Otherwise we might prematurely
5595 // convert this to shl+add/sub and then still have to type legalize those ops.
5596 // Another choice would be to defer the decision for illegal types until
5597 // after type legalization. But constant splat vectors of i64 can't make it
5598 // through type legalization on 32-bit targets so we would need to special
5599 // case vXi64.
5600 while (getTypeAction(Context, VT) != TypeLegal)
5601 VT = getTypeToTransformTo(Context, VT);
5602
5603 // If vector multiply is legal, assume that's faster than shl + add/sub.
5604 // Multiply is a complex op with higher latency and lower throughput in
5605 // most implementations, sub-vXi32 vector multiplies are always fast,
5606 // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
5607 // is always going to be slow.
5608 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5609 if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
5610 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
5611 return false;
5612
5613 // shl+add, shl+sub, shl+add+neg
5614 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5615 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5616}
5617
5618bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5619 unsigned Index) const {
5620 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5621 return false;
5622
5623 // Mask vectors support all subregister combinations and operations that
5624 // extract half of vector.
5625 if (ResVT.getVectorElementType() == MVT::i1)
5626 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5627 (Index == ResVT.getVectorNumElements()));
5628
5629 return (Index % ResVT.getVectorNumElements()) == 0;
5630}
5631
5632bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5633 unsigned Opc = VecOp.getOpcode();
5634
5635 // Assume target opcodes can't be scalarized.
5636 // TODO - do we have any exceptions?
5637 if (Opc >= ISD::BUILTIN_OP_END)
5638 return false;
5639
5640 // If the vector op is not supported, try to convert to scalar.
5641 EVT VecVT = VecOp.getValueType();
5642 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5643 return true;
5644
5645 // If the vector op is supported, but the scalar op is not, the transform may
5646 // not be worthwhile.
5647 EVT ScalarVT = VecVT.getScalarType();
5648 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5649}
5650
5651bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5652 bool) const {
5653 // TODO: Allow vectors?
5654 if (VT.isVector())
5655 return false;
5656 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5657}
5658
5659bool X86TargetLowering::isCheapToSpeculateCttz() const {
5660 // Speculate cttz only if we can directly use TZCNT.
5661 return Subtarget.hasBMI();
5662}
5663
5664bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5665 // Speculate ctlz only if we can directly use LZCNT.
5666 return Subtarget.hasLZCNT();
5667}
5668
5669bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
5670 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
5671 (VT == MVT::f16 && Subtarget.hasFP16());
5672}
5673
5674bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
5675 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
5676 // expensive than a straight movsd. On the other hand, it's important to
5677 // shrink long double fp constant since fldt is very slow.
5678 return !Subtarget.hasSSE2() || VT == MVT::f80;
5679}
5680
5681bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {
5682 return (VT == MVT::f64 && Subtarget.hasSSE2()) ||
5683 (VT == MVT::f32 && Subtarget.hasSSE1()) ||
5684 (VT == MVT::f16 && Subtarget.hasFP16());
5685}
5686
5687bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5688 const SelectionDAG &DAG,
5689 const MachineMemOperand &MMO) const {
5690 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5691 BitcastVT.getVectorElementType() == MVT::i1)
5692 return false;
5693
5694 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5695 return false;
5696
5697 // If both types are legal vectors, it's always ok to convert them.
5698 if (LoadVT.isVector() && BitcastVT.isVector() &&
5699 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5700 return true;
5701
5702 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5703}
5704
5705bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5706 const MachineFunction &MF) const {
5707 // Do not merge to float value size (128 bytes) if no implicit
5708 // float attribute is set.
5709 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
5710
5711 if (NoFloat) {
5712 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5713 return (MemVT.getSizeInBits() <= MaxIntSize);
5714 }
5715 // Make sure we don't merge greater than our preferred vector
5716 // width.
5717 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5718 return false;
5719
5720 return true;
5721}
5722
5723bool X86TargetLowering::isCtlzFast() const {
5724 return Subtarget.hasFastLZCNT();
5725}
5726
5727bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5728 const Instruction &AndI) const {
5729 return true;
5730}
5731
5732bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5733 EVT VT = Y.getValueType();
5734
5735 if (VT.isVector())
5736 return false;
5737
5738 if (!Subtarget.hasBMI())
5739 return false;
5740
5741 // There are only 32-bit and 64-bit forms for 'andn'.
5742 if (VT != MVT::i32 && VT != MVT::i64)
5743 return false;
5744
5745 return !isa<ConstantSDNode>(Y);
5746}
5747
5748bool X86TargetLowering::hasAndNot(SDValue Y) const {
5749 EVT VT = Y.getValueType();
5750
5751 if (!VT.isVector())
5752 return hasAndNotCompare(Y);
5753
5754 // Vector.
5755
5756 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5757 return false;
5758
5759 if (VT == MVT::v4i32)
5760 return true;
5761
5762 return Subtarget.hasSSE2();
5763}
5764
5765bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5766 return X.getValueType().isScalarInteger(); // 'bt'
5767}
5768
5769bool X86TargetLowering::
5770 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5771 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5772 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5773 SelectionDAG &DAG) const {
5774 // Does baseline recommend not to perform the fold by default?
5775 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5776 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5777 return false;
5778 // For scalars this transform is always beneficial.
5779 if (X.getValueType().isScalarInteger())
5780 return true;
5781 // If all the shift amounts are identical, then transform is beneficial even
5782 // with rudimentary SSE2 shifts.
5783 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5784 return true;
5785 // If we have AVX2 with it's powerful shift operations, then it's also good.
5786 if (Subtarget.hasAVX2())
5787 return true;
5788 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5789 return NewShiftOpcode == ISD::SHL;
5790}
5791
5792bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5793 const SDNode *N, CombineLevel Level) const {
5794 assert(((N->getOpcode() == ISD::SHL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5798, __extension__
__PRETTY_FUNCTION__))
5795 N->getOperand(0).getOpcode() == ISD::SRL) ||(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5798, __extension__
__PRETTY_FUNCTION__))
5796 (N->getOpcode() == ISD::SRL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5798, __extension__
__PRETTY_FUNCTION__))
5797 N->getOperand(0).getOpcode() == ISD::SHL)) &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5798, __extension__
__PRETTY_FUNCTION__))
5798 "Expected shift-shift mask")(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/X86/X86ISelLowering.cpp", 5798, __extension__
__PRETTY_FUNCTION__))
;
5799 EVT VT = N->getValueType(0);
5800 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5801 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5802 // Only fold if the shift values are equal - so it folds to AND.
5803 // TODO - we should fold if either is a non-uniform vector but we don't do
5804 // the fold for non-splats yet.
5805 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5806 }
5807 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5808}
5809
5810bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5811 EVT VT = Y.getValueType();
5812
5813 // For vectors, we don't have a preference, but we probably want a mask.
5814 if (VT.isVector())
5815 return false;
5816
5817 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5818 if (VT == MVT::i64 && !Subtarget.is64Bit())
5819 return false;
5820
5821 return true;
5822}
5823
5824bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5825 SDNode *N) const {
5826 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5827 !Subtarget.isOSWindows())
5828 return false;
5829 return true;
5830}
5831
5832bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5833 // Any legal vector type can be splatted more efficiently than
5834 // loading/spilling from memory.
5835 return isTypeLegal(VT);
5836}
5837
5838MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5839 MVT VT = MVT::getIntegerVT(NumBits);
5840 if (isTypeLegal(VT))
5841 return VT;
5842
5843 // PMOVMSKB can handle this.
5844 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5845 return MVT::v16i8;
5846
5847 // VPMOVMSKB can handle this.
5848 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5849 return MVT::v32i8;
5850
5851 // TODO: Allow 64-bit type for 32-bit target.
5852 // TODO: 512-bit types should be allowed, but make sure that those
5853 // cases are handled in combineVectorSizedSetCCEquality().
5854
5855 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5856}
5857
5858/// Val is the undef sentinel value or equal to the specified value.
5859static bool isUndefOrEqual(int Val, int CmpVal) {
5860 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5861}
5862
5863/// Return true if every element in Mask is the undef sentinel value or equal to
5864/// the specified value..
5865static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
5866 return llvm::all_of(Mask, [CmpVal](int M) {
5867 return (M == SM_SentinelUndef) || (M == CmpVal);
5868 });
5869}
5870
5871/// Val is either the undef or zero sentinel value.
5872static bool isUndefOrZero(int Val) {
5873 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5874}
5875
5876/// Return true if every element in Mask, beginning from position Pos and ending
5877/// in Pos+Size is the undef sentinel value.
5878static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5879 return llvm::all_of(Mask.slice(Pos, Size),
5880 [](int M) { return M == SM_SentinelUndef; });
5881}
5882
5883/// Return true if the mask creates a vector whose lower half is undefined.
5884static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5885 unsigned NumElts = Mask.size();
5886 return isUndefInRange(Mask, 0, NumElts / 2);
5887}
5888
5889/// Return true if the mask creates a vector whose upper half is undefined.
5890static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5891 unsigned NumElts = Mask.size();
5892 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5893}
5894
5895/// Return true if Val falls within the specified range (L, H].
5896static bool isInRange(int Val, int Low, int Hi) {
5897 return (Val >= Low && Val < Hi);
5898}
5899
5900/// Return true if the value of any element in Mask falls within the specified
5901/// range (L, H].
5902static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5903 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5904}
5905
5906/// Return true if the value of any element in Mask is the zero sentinel value.
5907static bool isAnyZero(ArrayRef<int> Mask) {
5908 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5909}
5910
5911/// Return true if the value of any element in Mask is the zero or undef
5912/// sentinel values.
5913static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5914 return llvm::any_of(Mask, [](int M) {
5915 return M == SM_SentinelZero || M == SM_SentinelUndef;
5916 });
5917}