Bug Summary

File:llvm/include/llvm/ADT/APInt.h
Warning:line 403, column 36
The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'llvm::APInt::WordType'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Target/X86 -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-06-21-164211-33944-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/IntrinsicLowering.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineJumpTableInfo.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetLowering.h"
43#include "llvm/CodeGen/WinEHFuncInfo.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/DiagnosticInfo.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/Instructions.h"
52#include "llvm/IR/Intrinsics.h"
53#include "llvm/IR/IRBuilder.h"
54#include "llvm/MC/MCAsmInfo.h"
55#include "llvm/MC/MCContext.h"
56#include "llvm/MC/MCExpr.h"
57#include "llvm/MC/MCSymbol.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/KnownBits.h"
62#include "llvm/Support/MathExtras.h"
63#include "llvm/Target/TargetOptions.h"
64#include <algorithm>
65#include <bitset>
66#include <cctype>
67#include <numeric>
68using namespace llvm;
69
70#define DEBUG_TYPE"x86-isel" "x86-isel"
71
72STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
73
74static cl::opt<int> ExperimentalPrefLoopAlignment(
75 "x86-experimental-pref-loop-alignment", cl::init(4),
76 cl::desc(
77 "Sets the preferable loop alignment for experiments (as log2 bytes)"
78 "(the last x86-experimental-pref-loop-alignment bits"
79 " of the loop header PC will be 0)."),
80 cl::Hidden);
81
82static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
83 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
84 cl::desc(
85 "Sets the preferable loop alignment for experiments (as log2 bytes) "
86 "for innermost loops only. If specified, this option overrides "
87 "alignment set by x86-experimental-pref-loop-alignment."),
88 cl::Hidden);
89
90static cl::opt<bool> MulConstantOptimization(
91 "mul-constant-optimization", cl::init(true),
92 cl::desc("Replace 'mul x, Const' with more effective instructions like "
93 "SHIFT, LEA, etc."),
94 cl::Hidden);
95
96static cl::opt<bool> ExperimentalUnorderedISEL(
97 "x86-experimental-unordered-atomic-isel", cl::init(false),
98 cl::desc("Use LoadSDNode and StoreSDNode instead of "
99 "AtomicSDNode for unordered atomic loads and "
100 "stores respectively."),
101 cl::Hidden);
102
103/// Call this when the user attempts to do something unsupported, like
104/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
105/// report_fatal_error, so calling code should attempt to recover without
106/// crashing.
107static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
108 const char *Msg) {
109 MachineFunction &MF = DAG.getMachineFunction();
110 DAG.getContext()->diagnose(
111 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
112}
113
114X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
115 const X86Subtarget &STI)
116 : TargetLowering(TM), Subtarget(STI) {
117 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
118 X86ScalarSSEf64 = Subtarget.hasSSE2();
119 X86ScalarSSEf32 = Subtarget.hasSSE1();
120 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
121
122 // Set up the TargetLowering object.
123
124 // X86 is weird. It always uses i8 for shift amounts and setcc results.
125 setBooleanContents(ZeroOrOneBooleanContent);
126 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // For 64-bit, since we have so many registers, use the ILP scheduler.
130 // For 32-bit, use the register pressure specific scheduling.
131 // For Atom, always use ILP scheduling.
132 if (Subtarget.isAtom())
133 setSchedulingPreference(Sched::ILP);
134 else if (Subtarget.is64Bit())
135 setSchedulingPreference(Sched::ILP);
136 else
137 setSchedulingPreference(Sched::RegPressure);
138 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
139 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
140
141 // Bypass expensive divides and use cheaper ones.
142 if (TM.getOptLevel() >= CodeGenOpt::Default) {
143 if (Subtarget.hasSlowDivide32())
144 addBypassSlowDiv(32, 8);
145 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
146 addBypassSlowDiv(64, 32);
147 }
148
149 // Setup Windows compiler runtime calls.
150 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
151 static const struct {
152 const RTLIB::Libcall Op;
153 const char * const Name;
154 const CallingConv::ID CC;
155 } LibraryCalls[] = {
156 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
157 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
158 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
159 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
160 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
161 };
162
163 for (const auto &LC : LibraryCalls) {
164 setLibcallName(LC.Op, LC.Name);
165 setLibcallCallingConv(LC.Op, LC.CC);
166 }
167 }
168
169 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
170 // MSVCRT doesn't have powi; fall back to pow
171 setLibcallName(RTLIB::POWI_F32, nullptr);
172 setLibcallName(RTLIB::POWI_F64, nullptr);
173 }
174
175 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
176 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
177 // FIXME: Should we be limiting the atomic size on other configs? Default is
178 // 1024.
179 if (!Subtarget.hasCmpxchg8b())
180 setMaxAtomicSizeInBitsSupported(32);
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i8, &X86::GR8RegClass);
184 addRegisterClass(MVT::i16, &X86::GR16RegClass);
185 addRegisterClass(MVT::i32, &X86::GR32RegClass);
186 if (Subtarget.is64Bit())
187 addRegisterClass(MVT::i64, &X86::GR64RegClass);
188
189 for (MVT VT : MVT::integer_valuetypes())
190 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
191
192 // We don't accept any truncstore of integer registers.
193 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
194 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
195 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
196 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
197 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
198 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
199
200 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
201
202 // SETOEQ and SETUNE require checking two conditions.
203 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
204 setCondCodeAction(ISD::SETOEQ, VT, Expand);
205 setCondCodeAction(ISD::SETUNE, VT, Expand);
206 }
207
208 // Integer absolute.
209 if (Subtarget.hasCMov()) {
210 setOperationAction(ISD::ABS , MVT::i16 , Custom);
211 setOperationAction(ISD::ABS , MVT::i32 , Custom);
212 if (Subtarget.is64Bit())
213 setOperationAction(ISD::ABS , MVT::i64 , Custom);
214 }
215
216 // Funnel shifts.
217 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
218 // For slow shld targets we only lower for code size.
219 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
220
221 setOperationAction(ShiftOp , MVT::i8 , Custom);
222 setOperationAction(ShiftOp , MVT::i16 , Custom);
223 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
224 if (Subtarget.is64Bit())
225 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
226 }
227
228 if (!Subtarget.useSoftFloat()) {
229 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
230 // operation.
231 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
232 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
233 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
234 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
235 // We have an algorithm for SSE2, and we turn this into a 64-bit
236 // FILD or VCVTUSI2SS/SD for other targets.
237 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
238 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
239 // We have an algorithm for SSE2->double, and we turn this into a
240 // 64-bit FILD followed by conditional FADD for other targets.
241 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
242 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
243
244 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
245 // this operation.
246 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
247 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
248 // SSE has no i16 to fp conversion, only i32. We promote in the handler
249 // to allow f80 to use i16 and f64 to use i16 with sse1 only
250 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
251 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
252 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
253 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
254 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
255 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
256 // are Legal, f80 is custom lowered.
257 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
258 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
259
260 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
261 // this operation.
262 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
263 // FIXME: This doesn't generate invalid exception when it should. PR44019.
264 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
265 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
267 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
268 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
269 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
270 // are Legal, f80 is custom lowered.
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
273
274 // Handle FP_TO_UINT by promoting the destination to a larger signed
275 // conversion.
276 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
277 // FIXME: This doesn't generate invalid exception when it should. PR44019.
278 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
279 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
280 // FIXME: This doesn't generate invalid exception when it should. PR44019.
281 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
283 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
284 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
285 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
286
287 setOperationAction(ISD::LRINT, MVT::f32, Custom);
288 setOperationAction(ISD::LRINT, MVT::f64, Custom);
289 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
290 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
291
292 if (!Subtarget.is64Bit()) {
293 setOperationAction(ISD::LRINT, MVT::i64, Custom);
294 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
295 }
296 }
297
298 if (Subtarget.hasSSE2()) {
299 // Custom lowering for saturating float to int conversions.
300 // We handle promotion to larger result types manually.
301 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
302 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
303 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
304 }
305 if (Subtarget.is64Bit()) {
306 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
307 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
308 }
309 }
310
311 // Handle address space casts between mixed sized pointers.
312 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
313 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
314
315 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
316 if (!X86ScalarSSEf64) {
317 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
318 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
319 if (Subtarget.is64Bit()) {
320 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
321 // Without SSE, i64->f64 goes through memory.
322 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
323 }
324 } else if (!Subtarget.is64Bit())
325 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
326
327 // Scalar integer divide and remainder are lowered to use operations that
328 // produce two results, to match the available instructions. This exposes
329 // the two-result form to trivial CSE, which is able to combine x/y and x%y
330 // into a single instruction.
331 //
332 // Scalar integer multiply-high is also lowered to use two-result
333 // operations, to match the available instructions. However, plain multiply
334 // (low) operations are left as Legal, as there are single-result
335 // instructions for this in x86. Using the two-result multiply instructions
336 // when both high and low results are needed must be arranged by dagcombine.
337 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
338 setOperationAction(ISD::MULHS, VT, Expand);
339 setOperationAction(ISD::MULHU, VT, Expand);
340 setOperationAction(ISD::SDIV, VT, Expand);
341 setOperationAction(ISD::UDIV, VT, Expand);
342 setOperationAction(ISD::SREM, VT, Expand);
343 setOperationAction(ISD::UREM, VT, Expand);
344 }
345
346 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
347 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
348 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
349 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
350 setOperationAction(ISD::BR_CC, VT, Expand);
351 setOperationAction(ISD::SELECT_CC, VT, Expand);
352 }
353 if (Subtarget.is64Bit())
354 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
356 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
358
359 setOperationAction(ISD::FREM , MVT::f32 , Expand);
360 setOperationAction(ISD::FREM , MVT::f64 , Expand);
361 setOperationAction(ISD::FREM , MVT::f80 , Expand);
362 setOperationAction(ISD::FREM , MVT::f128 , Expand);
363
364 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
365 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
366 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
367 }
368
369 // Promote the i8 variants and force them on up to i32 which has a shorter
370 // encoding.
371 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
372 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
373
374 if (Subtarget.hasBMI()) {
375 // Promote the i16 zero undef variant and force it on up to i32 when tzcnt
376 // is enabled.
377 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
378 } else {
379 setOperationAction(ISD::CTTZ, MVT::i16, Custom);
380 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
381 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
382 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
383 if (Subtarget.is64Bit()) {
384 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
385 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
386 }
387 }
388
389 if (Subtarget.hasLZCNT()) {
390 // When promoting the i8 variants, force them to i32 for a shorter
391 // encoding.
392 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
393 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
394 } else {
395 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
396 if (VT == MVT::i64 && !Subtarget.is64Bit())
397 continue;
398 setOperationAction(ISD::CTLZ , VT, Custom);
399 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
400 }
401 }
402
403 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
404 ISD::STRICT_FP_TO_FP16}) {
405 // Special handling for half-precision floating point conversions.
406 // If we don't have F16C support, then lower half float conversions
407 // into library calls.
408 setOperationAction(
409 Op, MVT::f32,
410 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
411 // There's never any support for operations beyond MVT::f32.
412 setOperationAction(Op, MVT::f64, Expand);
413 setOperationAction(Op, MVT::f80, Expand);
414 setOperationAction(Op, MVT::f128, Expand);
415 }
416
417 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
418 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
419 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
420 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
421 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
422 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
423 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
424 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
425
426 setOperationAction(ISD::PARITY, MVT::i8, Custom);
427 if (Subtarget.hasPOPCNT()) {
428 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
429 } else {
430 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
431 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
432 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
433 if (Subtarget.is64Bit())
434 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
435 else
436 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
437
438 setOperationAction(ISD::PARITY, MVT::i16, Custom);
439 setOperationAction(ISD::PARITY, MVT::i32, Custom);
440 if (Subtarget.is64Bit())
441 setOperationAction(ISD::PARITY, MVT::i64, Custom);
442 }
443
444 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
445
446 if (!Subtarget.hasMOVBE())
447 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
448
449 // X86 wants to expand cmov itself.
450 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
451 setOperationAction(ISD::SELECT, VT, Custom);
452 setOperationAction(ISD::SETCC, VT, Custom);
453 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
454 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
455 }
456 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SELECT, VT, Custom);
460 setOperationAction(ISD::SETCC, VT, Custom);
461 }
462
463 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
464 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
465 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
466
467 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
468 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
469 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
470 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
471 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
472 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
473 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
474 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
475
476 // Darwin ABI issue.
477 for (auto VT : { MVT::i32, MVT::i64 }) {
478 if (VT == MVT::i64 && !Subtarget.is64Bit())
479 continue;
480 setOperationAction(ISD::ConstantPool , VT, Custom);
481 setOperationAction(ISD::JumpTable , VT, Custom);
482 setOperationAction(ISD::GlobalAddress , VT, Custom);
483 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
484 setOperationAction(ISD::ExternalSymbol , VT, Custom);
485 setOperationAction(ISD::BlockAddress , VT, Custom);
486 }
487
488 // 64-bit shl, sra, srl (iff 32-bit x86)
489 for (auto VT : { MVT::i32, MVT::i64 }) {
490 if (VT == MVT::i64 && !Subtarget.is64Bit())
491 continue;
492 setOperationAction(ISD::SHL_PARTS, VT, Custom);
493 setOperationAction(ISD::SRA_PARTS, VT, Custom);
494 setOperationAction(ISD::SRL_PARTS, VT, Custom);
495 }
496
497 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
498 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
499
500 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
501
502 // Expand certain atomics
503 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
504 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
505 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
506 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
507 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
508 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
509 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
510 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
511 }
512
513 if (!Subtarget.is64Bit())
514 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
515
516 if (Subtarget.hasCmpxchg16b()) {
517 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
518 }
519
520 // FIXME - use subtarget debug flags
521 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
522 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
523 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
524 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
525 }
526
527 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
528 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
529
530 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
531 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
532
533 setOperationAction(ISD::TRAP, MVT::Other, Legal);
534 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
535 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
536
537 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
538 setOperationAction(ISD::VASTART , MVT::Other, Custom);
539 setOperationAction(ISD::VAEND , MVT::Other, Expand);
540 bool Is64Bit = Subtarget.is64Bit();
541 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
542 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
543
544 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
545 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
546
547 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
548
549 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
550 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
551 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
552
553 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
554 // f32 and f64 use SSE.
555 // Set up the FP register classes.
556 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
557 : &X86::FR32RegClass);
558 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
559 : &X86::FR64RegClass);
560
561 // Disable f32->f64 extload as we can only generate this in one instruction
562 // under optsize. So its easier to pattern match (fpext (load)) for that
563 // case instead of needing to emit 2 instructions for extload in the
564 // non-optsize case.
565 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
566
567 for (auto VT : { MVT::f32, MVT::f64 }) {
568 // Use ANDPD to simulate FABS.
569 setOperationAction(ISD::FABS, VT, Custom);
570
571 // Use XORP to simulate FNEG.
572 setOperationAction(ISD::FNEG, VT, Custom);
573
574 // Use ANDPD and ORPD to simulate FCOPYSIGN.
575 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
576
577 // These might be better off as horizontal vector ops.
578 setOperationAction(ISD::FADD, VT, Custom);
579 setOperationAction(ISD::FSUB, VT, Custom);
580
581 // We don't support sin/cos/fmod
582 setOperationAction(ISD::FSIN , VT, Expand);
583 setOperationAction(ISD::FCOS , VT, Expand);
584 setOperationAction(ISD::FSINCOS, VT, Expand);
585 }
586
587 // Lower this to MOVMSK plus an AND.
588 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
589 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
590
591 } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
592 (UseX87 || Is64Bit)) {
593 // Use SSE for f32, x87 for f64.
594 // Set up the FP register classes.
595 addRegisterClass(MVT::f32, &X86::FR32RegClass);
596 if (UseX87)
597 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
598
599 // Use ANDPS to simulate FABS.
600 setOperationAction(ISD::FABS , MVT::f32, Custom);
601
602 // Use XORP to simulate FNEG.
603 setOperationAction(ISD::FNEG , MVT::f32, Custom);
604
605 if (UseX87)
606 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
607
608 // Use ANDPS and ORPS to simulate FCOPYSIGN.
609 if (UseX87)
610 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
611 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
612
613 // We don't support sin/cos/fmod
614 setOperationAction(ISD::FSIN , MVT::f32, Expand);
615 setOperationAction(ISD::FCOS , MVT::f32, Expand);
616 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
617
618 if (UseX87) {
619 // Always expand sin/cos functions even though x87 has an instruction.
620 setOperationAction(ISD::FSIN, MVT::f64, Expand);
621 setOperationAction(ISD::FCOS, MVT::f64, Expand);
622 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
623 }
624 } else if (UseX87) {
625 // f32 and f64 in x87.
626 // Set up the FP register classes.
627 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
628 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
629
630 for (auto VT : { MVT::f32, MVT::f64 }) {
631 setOperationAction(ISD::UNDEF, VT, Expand);
632 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
633
634 // Always expand sin/cos functions even though x87 has an instruction.
635 setOperationAction(ISD::FSIN , VT, Expand);
636 setOperationAction(ISD::FCOS , VT, Expand);
637 setOperationAction(ISD::FSINCOS, VT, Expand);
638 }
639 }
640
641 // Expand FP32 immediates into loads from the stack, save special cases.
642 if (isTypeLegal(MVT::f32)) {
643 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
644 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
645 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
646 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
647 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
648 } else // SSE immediates.
649 addLegalFPImmediate(APFloat(+0.0f)); // xorps
650 }
651 // Expand FP64 immediates into loads from the stack, save special cases.
652 if (isTypeLegal(MVT::f64)) {
653 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
654 addLegalFPImmediate(APFloat(+0.0)); // FLD0
655 addLegalFPImmediate(APFloat(+1.0)); // FLD1
656 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
657 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
658 } else // SSE immediates.
659 addLegalFPImmediate(APFloat(+0.0)); // xorpd
660 }
661 // Handle constrained floating-point operations of scalar.
662 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
663 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
664 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
665 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
666 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
667 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
668 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
669 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
670 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
671 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
672 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
673 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
674 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
675
676 // We don't support FMA.
677 setOperationAction(ISD::FMA, MVT::f64, Expand);
678 setOperationAction(ISD::FMA, MVT::f32, Expand);
679
680 // f80 always uses X87.
681 if (UseX87) {
682 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
683 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
684 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
685 {
686 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
687 addLegalFPImmediate(TmpFlt); // FLD0
688 TmpFlt.changeSign();
689 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
690
691 bool ignored;
692 APFloat TmpFlt2(+1.0);
693 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
694 &ignored);
695 addLegalFPImmediate(TmpFlt2); // FLD1
696 TmpFlt2.changeSign();
697 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
698 }
699
700 // Always expand sin/cos functions even though x87 has an instruction.
701 setOperationAction(ISD::FSIN , MVT::f80, Expand);
702 setOperationAction(ISD::FCOS , MVT::f80, Expand);
703 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
704
705 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
706 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
707 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
708 setOperationAction(ISD::FRINT, MVT::f80, Expand);
709 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
710 setOperationAction(ISD::FMA, MVT::f80, Expand);
711 setOperationAction(ISD::LROUND, MVT::f80, Expand);
712 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
713 setOperationAction(ISD::LRINT, MVT::f80, Custom);
714 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
715
716 // Handle constrained floating-point operations of scalar.
717 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
718 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
719 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
720 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
721 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
722 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
723 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
724 // as Custom.
725 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
726 }
727
728 // f128 uses xmm registers, but most operations require libcalls.
729 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
730 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
731 : &X86::VR128RegClass);
732
733 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
734
735 setOperationAction(ISD::FADD, MVT::f128, LibCall);
736 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
737 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
738 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
739 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
740 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
741 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
742 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
743 setOperationAction(ISD::FMA, MVT::f128, LibCall);
744 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
745
746 setOperationAction(ISD::FABS, MVT::f128, Custom);
747 setOperationAction(ISD::FNEG, MVT::f128, Custom);
748 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
749
750 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
751 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
752 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
753 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
754 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
755 // No STRICT_FSINCOS
756 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
757 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
758
759 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
760 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
761 // We need to custom handle any FP_ROUND with an f128 input, but
762 // LegalizeDAG uses the result type to know when to run a custom handler.
763 // So we have to list all legal floating point result types here.
764 if (isTypeLegal(MVT::f32)) {
765 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
766 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
767 }
768 if (isTypeLegal(MVT::f64)) {
769 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
770 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
771 }
772 if (isTypeLegal(MVT::f80)) {
773 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
774 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
775 }
776
777 setOperationAction(ISD::SETCC, MVT::f128, Custom);
778
779 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
780 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
781 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
782 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
783 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
784 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
785 }
786
787 // Always use a library call for pow.
788 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
789 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
790 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
791 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
792
793 setOperationAction(ISD::FLOG, MVT::f80, Expand);
794 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
795 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
796 setOperationAction(ISD::FEXP, MVT::f80, Expand);
797 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
798 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
799 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
800
801 // Some FP actions are always expanded for vector types.
802 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
803 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
804 setOperationAction(ISD::FSIN, VT, Expand);
805 setOperationAction(ISD::FSINCOS, VT, Expand);
806 setOperationAction(ISD::FCOS, VT, Expand);
807 setOperationAction(ISD::FREM, VT, Expand);
808 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
809 setOperationAction(ISD::FPOW, VT, Expand);
810 setOperationAction(ISD::FLOG, VT, Expand);
811 setOperationAction(ISD::FLOG2, VT, Expand);
812 setOperationAction(ISD::FLOG10, VT, Expand);
813 setOperationAction(ISD::FEXP, VT, Expand);
814 setOperationAction(ISD::FEXP2, VT, Expand);
815 }
816
817 // First set operation action for all vector types to either promote
818 // (for widening) or expand (for scalarization). Then we will selectively
819 // turn on ones that can be effectively codegen'd.
820 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
821 setOperationAction(ISD::SDIV, VT, Expand);
822 setOperationAction(ISD::UDIV, VT, Expand);
823 setOperationAction(ISD::SREM, VT, Expand);
824 setOperationAction(ISD::UREM, VT, Expand);
825 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
826 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
827 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
828 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
829 setOperationAction(ISD::FMA, VT, Expand);
830 setOperationAction(ISD::FFLOOR, VT, Expand);
831 setOperationAction(ISD::FCEIL, VT, Expand);
832 setOperationAction(ISD::FTRUNC, VT, Expand);
833 setOperationAction(ISD::FRINT, VT, Expand);
834 setOperationAction(ISD::FNEARBYINT, VT, Expand);
835 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
836 setOperationAction(ISD::MULHS, VT, Expand);
837 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
838 setOperationAction(ISD::MULHU, VT, Expand);
839 setOperationAction(ISD::SDIVREM, VT, Expand);
840 setOperationAction(ISD::UDIVREM, VT, Expand);
841 setOperationAction(ISD::CTPOP, VT, Expand);
842 setOperationAction(ISD::CTTZ, VT, Expand);
843 setOperationAction(ISD::CTLZ, VT, Expand);
844 setOperationAction(ISD::ROTL, VT, Expand);
845 setOperationAction(ISD::ROTR, VT, Expand);
846 setOperationAction(ISD::BSWAP, VT, Expand);
847 setOperationAction(ISD::SETCC, VT, Expand);
848 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
849 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
850 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
851 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
852 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
853 setOperationAction(ISD::TRUNCATE, VT, Expand);
854 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
855 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
856 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
857 setOperationAction(ISD::SELECT_CC, VT, Expand);
858 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
859 setTruncStoreAction(InnerVT, VT, Expand);
860
861 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
862 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
863
864 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
865 // types, we have to deal with them whether we ask for Expansion or not.
866 // Setting Expand causes its own optimisation problems though, so leave
867 // them legal.
868 if (VT.getVectorElementType() == MVT::i1)
869 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
870
871 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
872 // split/scalarized right now.
873 if (VT.getVectorElementType() == MVT::f16)
874 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
875 }
876 }
877
878 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
879 // with -msoft-float, disable use of MMX as well.
880 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
881 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
882 // No operations on x86mmx supported, everything uses intrinsics.
883 }
884
885 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
886 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
887 : &X86::VR128RegClass);
888
889 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
890 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
891 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
892 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
893 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
894 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
895 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
896 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
897
898 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
899 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
900
901 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
902 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
903 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
904 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
905 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
906 }
907
908 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
909 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
910 : &X86::VR128RegClass);
911
912 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
913 // registers cannot be used even for integer operations.
914 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
915 : &X86::VR128RegClass);
916 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
917 : &X86::VR128RegClass);
918 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
919 : &X86::VR128RegClass);
920 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
921 : &X86::VR128RegClass);
922
923 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
924 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
925 setOperationAction(ISD::SDIV, VT, Custom);
926 setOperationAction(ISD::SREM, VT, Custom);
927 setOperationAction(ISD::UDIV, VT, Custom);
928 setOperationAction(ISD::UREM, VT, Custom);
929 }
930
931 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
932 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
933 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
934
935 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
936 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
937 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
938 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
939 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
940 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
941 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
942 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
943 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
944 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
945
946 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
947 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
948
949 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
950 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
951 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
952
953 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
954 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
955 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
956 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
957 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
958 }
959
960 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
961 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
962 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
963 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
964 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
965 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
966 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
967 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
968 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
969 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
970
971 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
972 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
973 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
974
975 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
976 setOperationAction(ISD::SETCC, VT, Custom);
977 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
978 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
979 setOperationAction(ISD::CTPOP, VT, Custom);
980 setOperationAction(ISD::ABS, VT, Custom);
981
982 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
983 // setcc all the way to isel and prefer SETGT in some isel patterns.
984 setCondCodeAction(ISD::SETLT, VT, Custom);
985 setCondCodeAction(ISD::SETLE, VT, Custom);
986 }
987
988 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
989 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
990 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
991 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
992 setOperationAction(ISD::VSELECT, VT, Custom);
993 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
994 }
995
996 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
997 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
998 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
999 setOperationAction(ISD::VSELECT, VT, Custom);
1000
1001 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1002 continue;
1003
1004 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1005 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1006 }
1007
1008 // Custom lower v2i64 and v2f64 selects.
1009 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1010 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1011 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1012 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1013 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1014
1015 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1016 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1017 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
1018 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1019
1020 // Custom legalize these to avoid over promotion or custom promotion.
1021 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1022 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1023 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1024 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1025 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1026 }
1027
1028 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1029 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
1030 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1031 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1032
1033 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1034 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1035
1036 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1037 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1038
1039 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1040 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1041 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1042 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1043 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1044
1045 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1046 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1047 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1048 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1049
1050 // We want to legalize this to an f64 load rather than an i64 load on
1051 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1052 // store.
1053 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1054 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1055 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1056 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1057 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1058 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1059
1060 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1061 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1062 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1063 if (!Subtarget.hasAVX512())
1064 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1065
1066 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1067 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1068 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1069
1070 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1071
1072 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1073 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1074 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1075 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1076 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1077 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1078
1079 // In the customized shift lowering, the legal v4i32/v2i64 cases
1080 // in AVX2 will be recognized.
1081 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1082 setOperationAction(ISD::SRL, VT, Custom);
1083 setOperationAction(ISD::SHL, VT, Custom);
1084 setOperationAction(ISD::SRA, VT, Custom);
1085 }
1086
1087 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1088 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1089
1090 // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
1091 // shifts) is better.
1092 if (!Subtarget.useAVX512Regs() &&
1093 !(Subtarget.hasBWI() && Subtarget.hasVLX()))
1094 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1095
1096 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1097 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1098 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1099 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1100 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1101 }
1102
1103 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1104 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1105 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1106 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1107 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1108 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1109 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1110 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1111 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1112
1113 // These might be better off as horizontal vector ops.
1114 setOperationAction(ISD::ADD, MVT::i16, Custom);
1115 setOperationAction(ISD::ADD, MVT::i32, Custom);
1116 setOperationAction(ISD::SUB, MVT::i16, Custom);
1117 setOperationAction(ISD::SUB, MVT::i32, Custom);
1118 }
1119
1120 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1121 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1122 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1123 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1124 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1125 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1126 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1127 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1128 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1129 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1130 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1131 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1132 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1133 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1134
1135 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1136 }
1137
1138 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1139 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1140 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1141 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1142 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1143 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1144 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1145 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1146
1147 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1148
1149 // FIXME: Do we need to handle scalar-to-vector here?
1150 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1151
1152 // We directly match byte blends in the backend as they match the VSELECT
1153 // condition form.
1154 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1155
1156 // SSE41 brings specific instructions for doing vector sign extend even in
1157 // cases where we don't have SRA.
1158 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1159 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1160 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1161 }
1162
1163 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1164 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1165 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1166 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1167 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1168 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1169 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1170 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1171 }
1172
1173 // i8 vectors are custom because the source register and source
1174 // source memory operand types are not the same width.
1175 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1176
1177 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1178 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1179 // do the pre and post work in the vector domain.
1180 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1181 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1182 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1183 // so that DAG combine doesn't try to turn it into uint_to_fp.
1184 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1185 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1186 }
1187 }
1188
1189 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1190 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1191 }
1192
1193 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1194 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1195 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1196 setOperationAction(ISD::ROTL, VT, Custom);
1197
1198 // XOP can efficiently perform BITREVERSE with VPPERM.
1199 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1200 setOperationAction(ISD::BITREVERSE, VT, Custom);
1201
1202 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1203 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1204 setOperationAction(ISD::BITREVERSE, VT, Custom);
1205 }
1206
1207 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1208 bool HasInt256 = Subtarget.hasInt256();
1209
1210 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1211 : &X86::VR256RegClass);
1212 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1213 : &X86::VR256RegClass);
1214 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1215 : &X86::VR256RegClass);
1216 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1217 : &X86::VR256RegClass);
1218 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1219 : &X86::VR256RegClass);
1220 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1221 : &X86::VR256RegClass);
1222
1223 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1224 setOperationAction(ISD::FFLOOR, VT, Legal);
1225 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1226 setOperationAction(ISD::FCEIL, VT, Legal);
1227 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1228 setOperationAction(ISD::FTRUNC, VT, Legal);
1229 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1230 setOperationAction(ISD::FRINT, VT, Legal);
1231 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1232 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1233 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1234 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1235 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1236
1237 setOperationAction(ISD::FROUND, VT, Custom);
1238
1239 setOperationAction(ISD::FNEG, VT, Custom);
1240 setOperationAction(ISD::FABS, VT, Custom);
1241 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1242 }
1243
1244 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1245 // even though v8i16 is a legal type.
1246 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1247 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1248 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1249 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1250 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1251 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1252
1253 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1254 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1255
1256 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1257 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1258 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1259 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1260 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1261 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1262 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1263 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1264 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1265 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1266 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1267 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1268
1269 if (!Subtarget.hasAVX512())
1270 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1271
1272 // In the customized shift lowering, the legal v8i32/v4i64 cases
1273 // in AVX2 will be recognized.
1274 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1275 setOperationAction(ISD::SRL, VT, Custom);
1276 setOperationAction(ISD::SHL, VT, Custom);
1277 setOperationAction(ISD::SRA, VT, Custom);
1278 }
1279
1280 // These types need custom splitting if their input is a 128-bit vector.
1281 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1282 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1283 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1284 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1285
1286 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1287 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1288
1289 // With BWI, expanding (and promoting the shifts) is the better.
1290 if (!Subtarget.useBWIRegs())
1291 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1292
1293 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1294 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1295 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1296 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1297 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1298 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1299
1300 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1301 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1302 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1303 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1304 }
1305
1306 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1307 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1308 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1309 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1310
1311 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1312 setOperationAction(ISD::SETCC, VT, Custom);
1313 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1314 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1315 setOperationAction(ISD::CTPOP, VT, Custom);
1316 setOperationAction(ISD::CTLZ, VT, Custom);
1317
1318 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1319 // setcc all the way to isel and prefer SETGT in some isel patterns.
1320 setCondCodeAction(ISD::SETLT, VT, Custom);
1321 setCondCodeAction(ISD::SETLE, VT, Custom);
1322 }
1323
1324 if (Subtarget.hasAnyFMA()) {
1325 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1326 MVT::v2f64, MVT::v4f64 }) {
1327 setOperationAction(ISD::FMA, VT, Legal);
1328 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1329 }
1330 }
1331
1332 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1333 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1334 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1335 }
1336
1337 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1338 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1339 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1340 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1341
1342 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1343 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1344 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1345 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1346 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1347 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1348
1349 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1350 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1351
1352 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1353 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1354 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1355 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1356 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1357
1358 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1359 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1360 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1361 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1362 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1363 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1364 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1365 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1366 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1367 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1368 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1369 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1370
1371 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1372 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1373 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1374 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1375 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1376 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1377 }
1378
1379 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1380 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1381 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1382 }
1383
1384 if (HasInt256) {
1385 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1386 // when we have a 256bit-wide blend with immediate.
1387 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1388 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1389
1390 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1391 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1392 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1393 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1394 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1395 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1396 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1397 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1398 }
1399 }
1400
1401 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1402 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1403 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1404 setOperationAction(ISD::MSTORE, VT, Legal);
1405 }
1406
1407 // Extract subvector is special because the value type
1408 // (result) is 128-bit but the source is 256-bit wide.
1409 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1410 MVT::v4f32, MVT::v2f64 }) {
1411 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1412 }
1413
1414 // Custom lower several nodes for 256-bit types.
1415 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1416 MVT::v8f32, MVT::v4f64 }) {
1417 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1418 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1419 setOperationAction(ISD::VSELECT, VT, Custom);
1420 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1421 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1422 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1423 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1424 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1425 setOperationAction(ISD::STORE, VT, Custom);
1426 }
1427
1428 if (HasInt256) {
1429 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1430
1431 // Custom legalize 2x32 to get a little better code.
1432 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1433 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1434
1435 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1436 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1437 setOperationAction(ISD::MGATHER, VT, Custom);
1438 }
1439 }
1440
1441 // This block controls legalization of the mask vector sizes that are
1442 // available with AVX512. 512-bit vectors are in a separate block controlled
1443 // by useAVX512Regs.
1444 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1445 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1446 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1447 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1448 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1449 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1450
1451 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1452 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1453 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1454
1455 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1456 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1457 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1458 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1459 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1460 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1461 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1462 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1463 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1464 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1465 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1466 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1467
1468 // There is no byte sized k-register load or store without AVX512DQ.
1469 if (!Subtarget.hasDQI()) {
1470 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1471 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1472 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1473 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1474
1475 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1476 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1477 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1478 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1479 }
1480
1481 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1482 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1483 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1484 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1485 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1486 }
1487
1488 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1489 setOperationAction(ISD::VSELECT, VT, Expand);
1490
1491 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1492 setOperationAction(ISD::SETCC, VT, Custom);
1493 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1494 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1495 setOperationAction(ISD::SELECT, VT, Custom);
1496 setOperationAction(ISD::TRUNCATE, VT, Custom);
1497
1498 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1499 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1500 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1501 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1502 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1503 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1504 }
1505
1506 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1507 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1508 }
1509
1510 // This block controls legalization for 512-bit operations with 32/64 bit
1511 // elements. 512-bits can be disabled based on prefer-vector-width and
1512 // required-vector-width function attributes.
1513 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1514 bool HasBWI = Subtarget.hasBWI();
1515
1516 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1517 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1518 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1519 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1520 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1521 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1522
1523 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1524 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1525 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1526 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1527 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1528 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1529 if (HasBWI)
1530 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1531 }
1532
1533 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1534 setOperationAction(ISD::FNEG, VT, Custom);
1535 setOperationAction(ISD::FABS, VT, Custom);
1536 setOperationAction(ISD::FMA, VT, Legal);
1537 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1538 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1539 }
1540
1541 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1542 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1543 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1544 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1545 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1546 }
1547 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1548 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1549 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1550 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1551 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1552 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1553 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1554 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1555
1556 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1557 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1558 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1559 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1560 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1561 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1562 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1563 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1564 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1565 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1566 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1567 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1568
1569 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1570 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1571 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1572 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1573 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1574 if (HasBWI)
1575 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1576
1577 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1578 // to 512-bit rather than use the AVX2 instructions so that we can use
1579 // k-masks.
1580 if (!Subtarget.hasVLX()) {
1581 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1582 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1583 setOperationAction(ISD::MLOAD, VT, Custom);
1584 setOperationAction(ISD::MSTORE, VT, Custom);
1585 }
1586 }
1587
1588 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1589 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1590 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1591 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1592 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1593 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1594 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1595 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1596 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1597 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1598 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1599 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1600 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1601
1602 if (HasBWI) {
1603 // Extends from v64i1 masks to 512-bit vectors.
1604 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1605 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1606 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1607 }
1608
1609 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1610 setOperationAction(ISD::FFLOOR, VT, Legal);
1611 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1612 setOperationAction(ISD::FCEIL, VT, Legal);
1613 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1614 setOperationAction(ISD::FTRUNC, VT, Legal);
1615 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1616 setOperationAction(ISD::FRINT, VT, Legal);
1617 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1618 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1619 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1620 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1621 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1622
1623 setOperationAction(ISD::FROUND, VT, Custom);
1624 }
1625
1626 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1627 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1628 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1629 }
1630
1631 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1632 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1633 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1634 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1635
1636 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1637 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1638 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1639 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1640
1641 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1642 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1643 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1644 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1645 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1646 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1647
1648 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1649 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1650
1651 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1652
1653 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1654 setOperationAction(ISD::SRL, VT, Custom);
1655 setOperationAction(ISD::SHL, VT, Custom);
1656 setOperationAction(ISD::SRA, VT, Custom);
1657 setOperationAction(ISD::SETCC, VT, Custom);
1658
1659 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1660 // setcc all the way to isel and prefer SETGT in some isel patterns.
1661 setCondCodeAction(ISD::SETLT, VT, Custom);
1662 setCondCodeAction(ISD::SETLE, VT, Custom);
1663 }
1664 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1665 setOperationAction(ISD::SMAX, VT, Legal);
1666 setOperationAction(ISD::UMAX, VT, Legal);
1667 setOperationAction(ISD::SMIN, VT, Legal);
1668 setOperationAction(ISD::UMIN, VT, Legal);
1669 setOperationAction(ISD::ABS, VT, Legal);
1670 setOperationAction(ISD::CTPOP, VT, Custom);
1671 setOperationAction(ISD::ROTL, VT, Custom);
1672 setOperationAction(ISD::ROTR, VT, Custom);
1673 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1674 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1675 }
1676
1677 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1678 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1679 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1680 setOperationAction(ISD::CTLZ, VT, Custom);
1681 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1682 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1683 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1684 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1685 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1686 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1687 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1688 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1689 }
1690
1691 if (Subtarget.hasDQI()) {
1692 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1693 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1694 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1695 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1696 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1697 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1698 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1699 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1700
1701 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1702 }
1703
1704 if (Subtarget.hasCDI()) {
1705 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1706 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1707 setOperationAction(ISD::CTLZ, VT, Legal);
1708 }
1709 } // Subtarget.hasCDI()
1710
1711 if (Subtarget.hasVPOPCNTDQ()) {
1712 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1713 setOperationAction(ISD::CTPOP, VT, Legal);
1714 }
1715
1716 // Extract subvector is special because the value type
1717 // (result) is 256-bit but the source is 512-bit wide.
1718 // 128-bit was made Legal under AVX1.
1719 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1720 MVT::v8f32, MVT::v4f64 })
1721 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1722
1723 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1724 MVT::v16f32, MVT::v8f64 }) {
1725 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1726 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1727 setOperationAction(ISD::SELECT, VT, Custom);
1728 setOperationAction(ISD::VSELECT, VT, Custom);
1729 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1730 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1731 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1732 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1733 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1734 }
1735
1736 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1737 setOperationAction(ISD::MLOAD, VT, Legal);
1738 setOperationAction(ISD::MSTORE, VT, Legal);
1739 setOperationAction(ISD::MGATHER, VT, Custom);
1740 setOperationAction(ISD::MSCATTER, VT, Custom);
1741 }
1742 if (HasBWI) {
1743 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1744 setOperationAction(ISD::MLOAD, VT, Legal);
1745 setOperationAction(ISD::MSTORE, VT, Legal);
1746 }
1747 } else {
1748 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1749 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1750 }
1751
1752 if (Subtarget.hasVBMI2()) {
1753 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1754 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1755 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1756 setOperationAction(ISD::FSHL, VT, Custom);
1757 setOperationAction(ISD::FSHR, VT, Custom);
1758 }
1759
1760 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1761 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1762 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1763 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1764 }
1765 }// useAVX512Regs
1766
1767 // This block controls legalization for operations that don't have
1768 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1769 // narrower widths.
1770 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1771 // These operations are handled on non-VLX by artificially widening in
1772 // isel patterns.
1773
1774 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1775 Subtarget.hasVLX() ? Legal : Custom);
1776 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1777 Subtarget.hasVLX() ? Legal : Custom);
1778 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1779 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1780 Subtarget.hasVLX() ? Legal : Custom);
1781 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1782 Subtarget.hasVLX() ? Legal : Custom);
1783 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1784 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1785 Subtarget.hasVLX() ? Legal : Custom);
1786 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1787 Subtarget.hasVLX() ? Legal : Custom);
1788 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1789 Subtarget.hasVLX() ? Legal : Custom);
1790 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1791 Subtarget.hasVLX() ? Legal : Custom);
1792
1793 if (Subtarget.hasDQI()) {
1794 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1795 // v2f32 UINT_TO_FP is already custom under SSE2.
1796 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1798, __extension__ __PRETTY_FUNCTION__))
1797 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1798, __extension__ __PRETTY_FUNCTION__))
1798 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1798, __extension__ __PRETTY_FUNCTION__))
;
1799 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1800 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1801 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1802 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1803 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1804 }
1805
1806 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1807 setOperationAction(ISD::SMAX, VT, Legal);
1808 setOperationAction(ISD::UMAX, VT, Legal);
1809 setOperationAction(ISD::SMIN, VT, Legal);
1810 setOperationAction(ISD::UMIN, VT, Legal);
1811 setOperationAction(ISD::ABS, VT, Legal);
1812 }
1813
1814 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1815 setOperationAction(ISD::ROTL, VT, Custom);
1816 setOperationAction(ISD::ROTR, VT, Custom);
1817 }
1818
1819 // Custom legalize 2x32 to get a little better code.
1820 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1821 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1822
1823 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1824 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1825 setOperationAction(ISD::MSCATTER, VT, Custom);
1826
1827 if (Subtarget.hasDQI()) {
1828 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1829 setOperationAction(ISD::SINT_TO_FP, VT,
1830 Subtarget.hasVLX() ? Legal : Custom);
1831 setOperationAction(ISD::UINT_TO_FP, VT,
1832 Subtarget.hasVLX() ? Legal : Custom);
1833 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1834 Subtarget.hasVLX() ? Legal : Custom);
1835 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1836 Subtarget.hasVLX() ? Legal : Custom);
1837 setOperationAction(ISD::FP_TO_SINT, VT,
1838 Subtarget.hasVLX() ? Legal : Custom);
1839 setOperationAction(ISD::FP_TO_UINT, VT,
1840 Subtarget.hasVLX() ? Legal : Custom);
1841 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1842 Subtarget.hasVLX() ? Legal : Custom);
1843 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1844 Subtarget.hasVLX() ? Legal : Custom);
1845 setOperationAction(ISD::MUL, VT, Legal);
1846 }
1847 }
1848
1849 if (Subtarget.hasCDI()) {
1850 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1851 setOperationAction(ISD::CTLZ, VT, Legal);
1852 }
1853 } // Subtarget.hasCDI()
1854
1855 if (Subtarget.hasVPOPCNTDQ()) {
1856 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1857 setOperationAction(ISD::CTPOP, VT, Legal);
1858 }
1859 }
1860
1861 // This block control legalization of v32i1/v64i1 which are available with
1862 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1863 // useBWIRegs.
1864 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1865 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1866 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1867
1868 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1869 setOperationAction(ISD::VSELECT, VT, Expand);
1870 setOperationAction(ISD::TRUNCATE, VT, Custom);
1871 setOperationAction(ISD::SETCC, VT, Custom);
1872 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1873 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1874 setOperationAction(ISD::SELECT, VT, Custom);
1875 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1876 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1877 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1878 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1879 }
1880
1881 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1882 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1883
1884 // Extends from v32i1 masks to 256-bit vectors.
1885 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1886 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1887 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1888
1889 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1890 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1891 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1892 }
1893
1894 // These operations are handled on non-VLX by artificially widening in
1895 // isel patterns.
1896 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1897
1898 if (Subtarget.hasBITALG()) {
1899 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1900 setOperationAction(ISD::CTPOP, VT, Legal);
1901 }
1902 }
1903
1904 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1905 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1906 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1907 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1908 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1909 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1910
1911 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1912 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1913 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1914 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1915 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1916
1917 if (Subtarget.hasBWI()) {
1918 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1919 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1920 }
1921
1922 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1923 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1924 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1925 }
1926
1927 if (Subtarget.hasAMXTILE()) {
1928 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
1929 }
1930
1931 // We want to custom lower some of our intrinsics.
1932 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1933 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1934 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1935 if (!Subtarget.is64Bit()) {
1936 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1937 }
1938
1939 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1940 // handle type legalization for these operations here.
1941 //
1942 // FIXME: We really should do custom legalization for addition and
1943 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1944 // than generic legalization for 64-bit multiplication-with-overflow, though.
1945 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1946 if (VT == MVT::i64 && !Subtarget.is64Bit())
1947 continue;
1948 // Add/Sub/Mul with overflow operations are custom lowered.
1949 setOperationAction(ISD::SADDO, VT, Custom);
1950 setOperationAction(ISD::UADDO, VT, Custom);
1951 setOperationAction(ISD::SSUBO, VT, Custom);
1952 setOperationAction(ISD::USUBO, VT, Custom);
1953 setOperationAction(ISD::SMULO, VT, Custom);
1954 setOperationAction(ISD::UMULO, VT, Custom);
1955
1956 // Support carry in as value rather than glue.
1957 setOperationAction(ISD::ADDCARRY, VT, Custom);
1958 setOperationAction(ISD::SUBCARRY, VT, Custom);
1959 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1960 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
1961 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
1962 }
1963
1964 if (!Subtarget.is64Bit()) {
1965 // These libcalls are not available in 32-bit.
1966 setLibcallName(RTLIB::SHL_I128, nullptr);
1967 setLibcallName(RTLIB::SRL_I128, nullptr);
1968 setLibcallName(RTLIB::SRA_I128, nullptr);
1969 setLibcallName(RTLIB::MUL_I128, nullptr);
1970 }
1971
1972 // Combine sin / cos into _sincos_stret if it is available.
1973 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1974 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1975 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1976 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1977 }
1978
1979 if (Subtarget.isTargetWin64()) {
1980 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1981 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1982 setOperationAction(ISD::SREM, MVT::i128, Custom);
1983 setOperationAction(ISD::UREM, MVT::i128, Custom);
1984 }
1985
1986 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1987 // is. We should promote the value to 64-bits to solve this.
1988 // This is what the CRT headers do - `fmodf` is an inline header
1989 // function casting to f64 and calling `fmod`.
1990 if (Subtarget.is32Bit() &&
1991 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
1992 for (ISD::NodeType Op :
1993 {ISD::FCEIL, ISD::STRICT_FCEIL,
1994 ISD::FCOS, ISD::STRICT_FCOS,
1995 ISD::FEXP, ISD::STRICT_FEXP,
1996 ISD::FFLOOR, ISD::STRICT_FFLOOR,
1997 ISD::FREM, ISD::STRICT_FREM,
1998 ISD::FLOG, ISD::STRICT_FLOG,
1999 ISD::FLOG10, ISD::STRICT_FLOG10,
2000 ISD::FPOW, ISD::STRICT_FPOW,
2001 ISD::FSIN, ISD::STRICT_FSIN})
2002 if (isOperationExpand(Op, MVT::f32))
2003 setOperationAction(Op, MVT::f32, Promote);
2004
2005 // We have target-specific dag combine patterns for the following nodes:
2006 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
2007 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
2008 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
2009 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
2010 setTargetDAGCombine(ISD::CONCAT_VECTORS);
2011 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
2012 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
2013 setTargetDAGCombine(ISD::BITCAST);
2014 setTargetDAGCombine(ISD::VSELECT);
2015 setTargetDAGCombine(ISD::SELECT);
2016 setTargetDAGCombine(ISD::SHL);
2017 setTargetDAGCombine(ISD::SRA);
2018 setTargetDAGCombine(ISD::SRL);
2019 setTargetDAGCombine(ISD::OR);
2020 setTargetDAGCombine(ISD::AND);
2021 setTargetDAGCombine(ISD::ADD);
2022 setTargetDAGCombine(ISD::FADD);
2023 setTargetDAGCombine(ISD::FSUB);
2024 setTargetDAGCombine(ISD::FNEG);
2025 setTargetDAGCombine(ISD::FMA);
2026 setTargetDAGCombine(ISD::STRICT_FMA);
2027 setTargetDAGCombine(ISD::FMINNUM);
2028 setTargetDAGCombine(ISD::FMAXNUM);
2029 setTargetDAGCombine(ISD::SUB);
2030 setTargetDAGCombine(ISD::LOAD);
2031 setTargetDAGCombine(ISD::MLOAD);
2032 setTargetDAGCombine(ISD::STORE);
2033 setTargetDAGCombine(ISD::MSTORE);
2034 setTargetDAGCombine(ISD::TRUNCATE);
2035 setTargetDAGCombine(ISD::ZERO_EXTEND);
2036 setTargetDAGCombine(ISD::ANY_EXTEND);
2037 setTargetDAGCombine(ISD::SIGN_EXTEND);
2038 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2039 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2040 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2041 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2042 setTargetDAGCombine(ISD::SINT_TO_FP);
2043 setTargetDAGCombine(ISD::UINT_TO_FP);
2044 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2045 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2046 setTargetDAGCombine(ISD::SETCC);
2047 setTargetDAGCombine(ISD::MUL);
2048 setTargetDAGCombine(ISD::XOR);
2049 setTargetDAGCombine(ISD::MSCATTER);
2050 setTargetDAGCombine(ISD::MGATHER);
2051 setTargetDAGCombine(ISD::FP16_TO_FP);
2052 setTargetDAGCombine(ISD::FP_EXTEND);
2053 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
2054 setTargetDAGCombine(ISD::FP_ROUND);
2055
2056 computeRegisterProperties(Subtarget.getRegisterInfo());
2057
2058 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2059 MaxStoresPerMemsetOptSize = 8;
2060 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2061 MaxStoresPerMemcpyOptSize = 4;
2062 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2063 MaxStoresPerMemmoveOptSize = 4;
2064
2065 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2066 // that needs to benchmarked and balanced with the potential use of vector
2067 // load/store types (PR33329, PR33914).
2068 MaxLoadsPerMemcmp = 2;
2069 MaxLoadsPerMemcmpOptSize = 2;
2070
2071 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
2072 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
2073
2074 // An out-of-order CPU can speculatively execute past a predictable branch,
2075 // but a conditional move could be stalled by an expensive earlier operation.
2076 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2077 EnableExtLdPromotion = true;
2078 setPrefFunctionAlignment(Align(16));
2079
2080 verifyIntrinsicTables();
2081
2082 // Default to having -disable-strictnode-mutation on
2083 IsStrictFPEnabled = true;
2084}
2085
2086// This has so far only been implemented for 64-bit MachO.
2087bool X86TargetLowering::useLoadStackGuardNode() const {
2088 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2089}
2090
2091bool X86TargetLowering::useStackGuardXorFP() const {
2092 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2093 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2094}
2095
2096SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2097 const SDLoc &DL) const {
2098 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2099 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2100 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2101 return SDValue(Node, 0);
2102}
2103
2104TargetLoweringBase::LegalizeTypeAction
2105X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2106 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2107 !Subtarget.hasBWI())
2108 return TypeSplitVector;
2109
2110 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2111 VT.getVectorElementType() != MVT::i1)
2112 return TypeWidenVector;
2113
2114 return TargetLoweringBase::getPreferredVectorAction(VT);
2115}
2116
2117static std::pair<MVT, unsigned>
2118handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2119 const X86Subtarget &Subtarget) {
2120 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2121 // convention is one that uses k registers.
2122 if (NumElts == 2)
2123 return {MVT::v2i64, 1};
2124 if (NumElts == 4)
2125 return {MVT::v4i32, 1};
2126 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2127 CC != CallingConv::Intel_OCL_BI)
2128 return {MVT::v8i16, 1};
2129 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2130 CC != CallingConv::Intel_OCL_BI)
2131 return {MVT::v16i8, 1};
2132 // v32i1 passes in ymm unless we have BWI and the calling convention is
2133 // regcall.
2134 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2135 return {MVT::v32i8, 1};
2136 // Split v64i1 vectors if we don't have v64i8 available.
2137 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2138 if (Subtarget.useAVX512Regs())
2139 return {MVT::v64i8, 1};
2140 return {MVT::v32i8, 2};
2141 }
2142
2143 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2144 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2145 NumElts > 64)
2146 return {MVT::i8, NumElts};
2147
2148 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2149}
2150
2151MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2152 CallingConv::ID CC,
2153 EVT VT) const {
2154 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2155 Subtarget.hasAVX512()) {
2156 unsigned NumElts = VT.getVectorNumElements();
2157
2158 MVT RegisterVT;
2159 unsigned NumRegisters;
2160 std::tie(RegisterVT, NumRegisters) =
2161 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2162 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2163 return RegisterVT;
2164 }
2165
2166 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2167}
2168
2169unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2170 CallingConv::ID CC,
2171 EVT VT) const {
2172 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2173 Subtarget.hasAVX512()) {
2174 unsigned NumElts = VT.getVectorNumElements();
2175
2176 MVT RegisterVT;
2177 unsigned NumRegisters;
2178 std::tie(RegisterVT, NumRegisters) =
2179 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2180 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2181 return NumRegisters;
2182 }
2183
2184 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2185}
2186
2187unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2188 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2189 unsigned &NumIntermediates, MVT &RegisterVT) const {
2190 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2191 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2192 Subtarget.hasAVX512() &&
2193 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2194 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2195 VT.getVectorNumElements() > 64)) {
2196 RegisterVT = MVT::i8;
2197 IntermediateVT = MVT::i1;
2198 NumIntermediates = VT.getVectorNumElements();
2199 return NumIntermediates;
2200 }
2201
2202 // Split v64i1 vectors if we don't have v64i8 available.
2203 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2204 CC != CallingConv::X86_RegCall) {
2205 RegisterVT = MVT::v32i8;
2206 IntermediateVT = MVT::v32i1;
2207 NumIntermediates = 2;
2208 return 2;
2209 }
2210
2211 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2212 NumIntermediates, RegisterVT);
2213}
2214
2215EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2216 LLVMContext& Context,
2217 EVT VT) const {
2218 if (!VT.isVector())
2219 return MVT::i8;
2220
2221 if (Subtarget.hasAVX512()) {
2222 // Figure out what this type will be legalized to.
2223 EVT LegalVT = VT;
2224 while (getTypeAction(Context, LegalVT) != TypeLegal)
2225 LegalVT = getTypeToTransformTo(Context, LegalVT);
2226
2227 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2228 if (LegalVT.getSimpleVT().is512BitVector())
2229 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2230
2231 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2232 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2233 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2234 // vXi16/vXi8.
2235 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2236 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2237 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2238 }
2239 }
2240
2241 return VT.changeVectorElementTypeToInteger();
2242}
2243
2244/// Helper for getByValTypeAlignment to determine
2245/// the desired ByVal argument alignment.
2246static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2247 if (MaxAlign == 16)
2248 return;
2249 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2250 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2251 MaxAlign = Align(16);
2252 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2253 Align EltAlign;
2254 getMaxByValAlign(ATy->getElementType(), EltAlign);
2255 if (EltAlign > MaxAlign)
2256 MaxAlign = EltAlign;
2257 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2258 for (auto *EltTy : STy->elements()) {
2259 Align EltAlign;
2260 getMaxByValAlign(EltTy, EltAlign);
2261 if (EltAlign > MaxAlign)
2262 MaxAlign = EltAlign;
2263 if (MaxAlign == 16)
2264 break;
2265 }
2266 }
2267}
2268
2269/// Return the desired alignment for ByVal aggregate
2270/// function arguments in the caller parameter area. For X86, aggregates
2271/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2272/// are at 4-byte boundaries.
2273unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2274 const DataLayout &DL) const {
2275 if (Subtarget.is64Bit()) {
2276 // Max of 8 and alignment of type.
2277 Align TyAlign = DL.getABITypeAlign(Ty);
2278 if (TyAlign > 8)
2279 return TyAlign.value();
2280 return 8;
2281 }
2282
2283 Align Alignment(4);
2284 if (Subtarget.hasSSE1())
2285 getMaxByValAlign(Ty, Alignment);
2286 return Alignment.value();
2287}
2288
2289/// It returns EVT::Other if the type should be determined using generic
2290/// target-independent logic.
2291/// For vector ops we check that the overall size isn't larger than our
2292/// preferred vector width.
2293EVT X86TargetLowering::getOptimalMemOpType(
2294 const MemOp &Op, const AttributeList &FuncAttributes) const {
2295 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2296 if (Op.size() >= 16 &&
2297 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2298 // FIXME: Check if unaligned 64-byte accesses are slow.
2299 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2300 (Subtarget.getPreferVectorWidth() >= 512)) {
2301 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2302 }
2303 // FIXME: Check if unaligned 32-byte accesses are slow.
2304 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2305 (Subtarget.getPreferVectorWidth() >= 256)) {
2306 // Although this isn't a well-supported type for AVX1, we'll let
2307 // legalization and shuffle lowering produce the optimal codegen. If we
2308 // choose an optimal type with a vector element larger than a byte,
2309 // getMemsetStores() may create an intermediate splat (using an integer
2310 // multiply) before we splat as a vector.
2311 return MVT::v32i8;
2312 }
2313 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2314 return MVT::v16i8;
2315 // TODO: Can SSE1 handle a byte vector?
2316 // If we have SSE1 registers we should be able to use them.
2317 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2318 (Subtarget.getPreferVectorWidth() >= 128))
2319 return MVT::v4f32;
2320 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2321 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2322 // Do not use f64 to lower memcpy if source is string constant. It's
2323 // better to use i32 to avoid the loads.
2324 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2325 // The gymnastics of splatting a byte value into an XMM register and then
2326 // only using 8-byte stores (because this is a CPU with slow unaligned
2327 // 16-byte accesses) makes that a loser.
2328 return MVT::f64;
2329 }
2330 }
2331 // This is a compromise. If we reach here, unaligned accesses may be slow on
2332 // this target. However, creating smaller, aligned accesses could be even
2333 // slower and would certainly be a lot more code.
2334 if (Subtarget.is64Bit() && Op.size() >= 8)
2335 return MVT::i64;
2336 return MVT::i32;
2337}
2338
2339bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2340 if (VT == MVT::f32)
2341 return X86ScalarSSEf32;
2342 if (VT == MVT::f64)
2343 return X86ScalarSSEf64;
2344 return true;
2345}
2346
2347bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2348 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2349 bool *Fast) const {
2350 if (Fast) {
2351 switch (VT.getSizeInBits()) {
2352 default:
2353 // 8-byte and under are always assumed to be fast.
2354 *Fast = true;
2355 break;
2356 case 128:
2357 *Fast = !Subtarget.isUnalignedMem16Slow();
2358 break;
2359 case 256:
2360 *Fast = !Subtarget.isUnalignedMem32Slow();
2361 break;
2362 // TODO: What about AVX-512 (512-bit) accesses?
2363 }
2364 }
2365 // NonTemporal vector memory ops must be aligned.
2366 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2367 // NT loads can only be vector aligned, so if its less aligned than the
2368 // minimum vector size (which we can split the vector down to), we might as
2369 // well use a regular unaligned vector load.
2370 // We don't have any NT loads pre-SSE41.
2371 if (!!(Flags & MachineMemOperand::MOLoad))
2372 return (Alignment < 16 || !Subtarget.hasSSE41());
2373 return false;
2374 }
2375 // Misaligned accesses of any size are always allowed.
2376 return true;
2377}
2378
2379/// Return the entry encoding for a jump table in the
2380/// current function. The returned value is a member of the
2381/// MachineJumpTableInfo::JTEntryKind enum.
2382unsigned X86TargetLowering::getJumpTableEncoding() const {
2383 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2384 // symbol.
2385 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2386 return MachineJumpTableInfo::EK_Custom32;
2387
2388 // Otherwise, use the normal jump table encoding heuristics.
2389 return TargetLowering::getJumpTableEncoding();
2390}
2391
2392bool X86TargetLowering::useSoftFloat() const {
2393 return Subtarget.useSoftFloat();
2394}
2395
2396void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2397 ArgListTy &Args) const {
2398
2399 // Only relabel X86-32 for C / Stdcall CCs.
2400 if (Subtarget.is64Bit())
2401 return;
2402 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2403 return;
2404 unsigned ParamRegs = 0;
2405 if (auto *M = MF->getFunction().getParent())
2406 ParamRegs = M->getNumberRegisterParameters();
2407
2408 // Mark the first N int arguments as having reg
2409 for (auto &Arg : Args) {
2410 Type *T = Arg.Ty;
2411 if (T->isIntOrPtrTy())
2412 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2413 unsigned numRegs = 1;
2414 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2415 numRegs = 2;
2416 if (ParamRegs < numRegs)
2417 return;
2418 ParamRegs -= numRegs;
2419 Arg.IsInReg = true;
2420 }
2421 }
2422}
2423
2424const MCExpr *
2425X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2426 const MachineBasicBlock *MBB,
2427 unsigned uid,MCContext &Ctx) const{
2428 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2428, __extension__ __PRETTY_FUNCTION__))
;
2429 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2430 // entries.
2431 return MCSymbolRefExpr::create(MBB->getSymbol(),
2432 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2433}
2434
2435/// Returns relocation base for the given PIC jumptable.
2436SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2437 SelectionDAG &DAG) const {
2438 if (!Subtarget.is64Bit())
2439 // This doesn't have SDLoc associated with it, but is not really the
2440 // same as a Register.
2441 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2442 getPointerTy(DAG.getDataLayout()));
2443 return Table;
2444}
2445
2446/// This returns the relocation base for the given PIC jumptable,
2447/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2448const MCExpr *X86TargetLowering::
2449getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2450 MCContext &Ctx) const {
2451 // X86-64 uses RIP relative addressing based on the jump table label.
2452 if (Subtarget.isPICStyleRIPRel())
2453 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2454
2455 // Otherwise, the reference is relative to the PIC base.
2456 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2457}
2458
2459std::pair<const TargetRegisterClass *, uint8_t>
2460X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2461 MVT VT) const {
2462 const TargetRegisterClass *RRC = nullptr;
2463 uint8_t Cost = 1;
2464 switch (VT.SimpleTy) {
2465 default:
2466 return TargetLowering::findRepresentativeClass(TRI, VT);
2467 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2468 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2469 break;
2470 case MVT::x86mmx:
2471 RRC = &X86::VR64RegClass;
2472 break;
2473 case MVT::f32: case MVT::f64:
2474 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2475 case MVT::v4f32: case MVT::v2f64:
2476 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2477 case MVT::v8f32: case MVT::v4f64:
2478 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2479 case MVT::v16f32: case MVT::v8f64:
2480 RRC = &X86::VR128XRegClass;
2481 break;
2482 }
2483 return std::make_pair(RRC, Cost);
2484}
2485
2486unsigned X86TargetLowering::getAddressSpace() const {
2487 if (Subtarget.is64Bit())
2488 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2489 return 256;
2490}
2491
2492static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2493 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2494 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2495}
2496
2497static Constant* SegmentOffset(IRBuilderBase &IRB,
2498 int Offset, unsigned AddressSpace) {
2499 return ConstantExpr::getIntToPtr(
2500 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2501 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2502}
2503
2504Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
2505 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2506 // tcbhead_t; use it instead of the usual global variable (see
2507 // sysdeps/{i386,x86_64}/nptl/tls.h)
2508 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2509 if (Subtarget.isTargetFuchsia()) {
2510 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2511 return SegmentOffset(IRB, 0x10, getAddressSpace());
2512 } else {
2513 unsigned AddressSpace = getAddressSpace();
2514 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
2515 // Specially, some users may customize the base reg and offset.
2516 int Offset = M->getStackProtectorGuardOffset();
2517 // If we don't set -stack-protector-guard-offset value:
2518 // %fs:0x28, unless we're using a Kernel code model, in which case
2519 // it's %gs:0x28. gs:0x14 on i386.
2520 if (Offset == INT_MAX2147483647)
2521 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2522
2523 StringRef GuardReg = M->getStackProtectorGuardReg();
2524 if (GuardReg == "fs")
2525 AddressSpace = X86AS::FS;
2526 else if (GuardReg == "gs")
2527 AddressSpace = X86AS::GS;
2528 return SegmentOffset(IRB, Offset, AddressSpace);
2529 }
2530 }
2531 return TargetLowering::getIRStackGuard(IRB);
2532}
2533
2534void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2535 // MSVC CRT provides functionalities for stack protection.
2536 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2537 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2538 // MSVC CRT has a global variable holding security cookie.
2539 M.getOrInsertGlobal("__security_cookie",
2540 Type::getInt8PtrTy(M.getContext()));
2541
2542 // MSVC CRT has a function to validate security cookie.
2543 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2544 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2545 Type::getInt8PtrTy(M.getContext()));
2546 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2547 F->setCallingConv(CallingConv::X86_FastCall);
2548 F->addAttribute(1, Attribute::AttrKind::InReg);
2549 }
2550 return;
2551 }
2552
2553 StringRef GuardMode = M.getStackProtectorGuard();
2554
2555 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2556 if ((GuardMode == "tls" || GuardMode.empty()) &&
2557 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2558 return;
2559 TargetLowering::insertSSPDeclarations(M);
2560}
2561
2562Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2563 // MSVC CRT has a global variable holding security cookie.
2564 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2565 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2566 return M.getGlobalVariable("__security_cookie");
2567 }
2568 return TargetLowering::getSDagStackGuard(M);
2569}
2570
2571Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2572 // MSVC CRT has a function to validate security cookie.
2573 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2574 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2575 return M.getFunction("__security_check_cookie");
2576 }
2577 return TargetLowering::getSSPStackGuardCheck(M);
2578}
2579
2580Value *
2581X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
2582 if (Subtarget.getTargetTriple().isOSContiki())
2583 return getDefaultSafeStackPointerLocation(IRB, false);
2584
2585 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2586 // definition of TLS_SLOT_SAFESTACK in
2587 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2588 if (Subtarget.isTargetAndroid()) {
2589 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2590 // %gs:0x24 on i386
2591 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2592 return SegmentOffset(IRB, Offset, getAddressSpace());
2593 }
2594
2595 // Fuchsia is similar.
2596 if (Subtarget.isTargetFuchsia()) {
2597 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2598 return SegmentOffset(IRB, 0x18, getAddressSpace());
2599 }
2600
2601 return TargetLowering::getSafeStackPointerLocation(IRB);
2602}
2603
2604//===----------------------------------------------------------------------===//
2605// Return Value Calling Convention Implementation
2606//===----------------------------------------------------------------------===//
2607
2608bool X86TargetLowering::CanLowerReturn(
2609 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2610 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2611 SmallVector<CCValAssign, 16> RVLocs;
2612 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2613 return CCInfo.CheckReturn(Outs, RetCC_X86);
2614}
2615
2616const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2617 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2618 return ScratchRegs;
2619}
2620
2621/// Lowers masks values (v*i1) to the local register values
2622/// \returns DAG node after lowering to register type
2623static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2624 const SDLoc &Dl, SelectionDAG &DAG) {
2625 EVT ValVT = ValArg.getValueType();
2626
2627 if (ValVT == MVT::v1i1)
2628 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2629 DAG.getIntPtrConstant(0, Dl));
2630
2631 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2632 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2633 // Two stage lowering might be required
2634 // bitcast: v8i1 -> i8 / v16i1 -> i16
2635 // anyextend: i8 -> i32 / i16 -> i32
2636 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2637 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2638 if (ValLoc == MVT::i32)
2639 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2640 return ValToCopy;
2641 }
2642
2643 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2644 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2645 // One stage lowering is required
2646 // bitcast: v32i1 -> i32 / v64i1 -> i64
2647 return DAG.getBitcast(ValLoc, ValArg);
2648 }
2649
2650 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2651}
2652
2653/// Breaks v64i1 value into two registers and adds the new node to the DAG
2654static void Passv64i1ArgInRegs(
2655 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2656 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2657 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2658 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2658, __extension__ __PRETTY_FUNCTION__))
;
2659 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2659, __extension__ __PRETTY_FUNCTION__))
;
2660 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2660, __extension__ __PRETTY_FUNCTION__))
;
2661 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2662, __extension__ __PRETTY_FUNCTION__))
2662 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2662, __extension__ __PRETTY_FUNCTION__))
;
2663
2664 // Before splitting the value we cast it to i64
2665 Arg = DAG.getBitcast(MVT::i64, Arg);
2666
2667 // Splitting the value into two i32 types
2668 SDValue Lo, Hi;
2669 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2670 DAG.getConstant(0, Dl, MVT::i32));
2671 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2672 DAG.getConstant(1, Dl, MVT::i32));
2673
2674 // Attach the two i32 types into corresponding registers
2675 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2676 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2677}
2678
2679SDValue
2680X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2681 bool isVarArg,
2682 const SmallVectorImpl<ISD::OutputArg> &Outs,
2683 const SmallVectorImpl<SDValue> &OutVals,
2684 const SDLoc &dl, SelectionDAG &DAG) const {
2685 MachineFunction &MF = DAG.getMachineFunction();
2686 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2687
2688 // In some cases we need to disable registers from the default CSR list.
2689 // For example, when they are used for argument passing.
2690 bool ShouldDisableCalleeSavedRegister =
2691 CallConv == CallingConv::X86_RegCall ||
2692 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2693
2694 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2695 report_fatal_error("X86 interrupts may not return any value");
2696
2697 SmallVector<CCValAssign, 16> RVLocs;
2698 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2699 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2700
2701 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2702 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2703 ++I, ++OutsIndex) {
2704 CCValAssign &VA = RVLocs[I];
2705 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2705, __extension__ __PRETTY_FUNCTION__))
;
2706
2707 // Add the register to the CalleeSaveDisableRegs list.
2708 if (ShouldDisableCalleeSavedRegister)
2709 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2710
2711 SDValue ValToCopy = OutVals[OutsIndex];
2712 EVT ValVT = ValToCopy.getValueType();
2713
2714 // Promote values to the appropriate types.
2715 if (VA.getLocInfo() == CCValAssign::SExt)
2716 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2717 else if (VA.getLocInfo() == CCValAssign::ZExt)
2718 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2719 else if (VA.getLocInfo() == CCValAssign::AExt) {
2720 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2721 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2722 else
2723 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2724 }
2725 else if (VA.getLocInfo() == CCValAssign::BCvt)
2726 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2727
2728 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2729, __extension__ __PRETTY_FUNCTION__))
2729 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2729, __extension__ __PRETTY_FUNCTION__))
;
2730
2731 // Report an error if we have attempted to return a value via an XMM
2732 // register and SSE was disabled.
2733 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2734 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2735 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2736 } else if (!Subtarget.hasSSE2() &&
2737 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2738 ValVT == MVT::f64) {
2739 // When returning a double via an XMM register, report an error if SSE2 is
2740 // not enabled.
2741 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2742 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2743 }
2744
2745 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2746 // the RET instruction and handled by the FP Stackifier.
2747 if (VA.getLocReg() == X86::FP0 ||
2748 VA.getLocReg() == X86::FP1) {
2749 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2750 // change the value to the FP stack register class.
2751 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2752 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2753 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2754 // Don't emit a copytoreg.
2755 continue;
2756 }
2757
2758 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2759 // which is returned in RAX / RDX.
2760 if (Subtarget.is64Bit()) {
2761 if (ValVT == MVT::x86mmx) {
2762 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2763 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2764 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2765 ValToCopy);
2766 // If we don't have SSE2 available, convert to v4f32 so the generated
2767 // register is legal.
2768 if (!Subtarget.hasSSE2())
2769 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2770 }
2771 }
2772 }
2773
2774 if (VA.needsCustom()) {
2775 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2776, __extension__ __PRETTY_FUNCTION__))
2776 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2776, __extension__ __PRETTY_FUNCTION__))
;
2777
2778 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2779 Subtarget);
2780
2781 // Add the second register to the CalleeSaveDisableRegs list.
2782 if (ShouldDisableCalleeSavedRegister)
2783 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2784 } else {
2785 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2786 }
2787 }
2788
2789 SDValue Flag;
2790 SmallVector<SDValue, 6> RetOps;
2791 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2792 // Operand #1 = Bytes To Pop
2793 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2794 MVT::i32));
2795
2796 // Copy the result values into the output registers.
2797 for (auto &RetVal : RetVals) {
2798 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2799 RetOps.push_back(RetVal.second);
2800 continue; // Don't emit a copytoreg.
2801 }
2802
2803 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2804 Flag = Chain.getValue(1);
2805 RetOps.push_back(
2806 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
2807 }
2808
2809 // Swift calling convention does not require we copy the sret argument
2810 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2811
2812 // All x86 ABIs require that for returning structs by value we copy
2813 // the sret argument into %rax/%eax (depending on ABI) for the return.
2814 // We saved the argument into a virtual register in the entry block,
2815 // so now we copy the value out and into %rax/%eax.
2816 //
2817 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2818 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2819 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2820 // either case FuncInfo->setSRetReturnReg() will have been called.
2821 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
2822 // When we have both sret and another return value, we should use the
2823 // original Chain stored in RetOps[0], instead of the current Chain updated
2824 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2825
2826 // For the case of sret and another return value, we have
2827 // Chain_0 at the function entry
2828 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2829 // If we use Chain_1 in getCopyFromReg, we will have
2830 // Val = getCopyFromReg(Chain_1)
2831 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2832
2833 // getCopyToReg(Chain_0) will be glued together with
2834 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2835 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2836 // Data dependency from Unit B to Unit A due to usage of Val in
2837 // getCopyToReg(Chain_1, Val)
2838 // Chain dependency from Unit A to Unit B
2839
2840 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2841 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2842 getPointerTy(MF.getDataLayout()));
2843
2844 Register RetValReg
2845 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2846 X86::RAX : X86::EAX;
2847 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2848 Flag = Chain.getValue(1);
2849
2850 // RAX/EAX now acts like a return value.
2851 RetOps.push_back(
2852 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2853
2854 // Add the returned register to the CalleeSaveDisableRegs list.
2855 if (ShouldDisableCalleeSavedRegister)
2856 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2857 }
2858
2859 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2860 const MCPhysReg *I =
2861 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2862 if (I) {
2863 for (; *I; ++I) {
2864 if (X86::GR64RegClass.contains(*I))
2865 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2866 else
2867 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2867)
;
2868 }
2869 }
2870
2871 RetOps[0] = Chain; // Update chain.
2872
2873 // Add the flag if we have it.
2874 if (Flag.getNode())
2875 RetOps.push_back(Flag);
2876
2877 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2878 if (CallConv == CallingConv::X86_INTR)
2879 opcode = X86ISD::IRET;
2880 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2881}
2882
2883bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2884 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2885 return false;
2886
2887 SDValue TCChain = Chain;
2888 SDNode *Copy = *N->use_begin();
2889 if (Copy->getOpcode() == ISD::CopyToReg) {
2890 // If the copy has a glue operand, we conservatively assume it isn't safe to
2891 // perform a tail call.
2892 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2893 return false;
2894 TCChain = Copy->getOperand(0);
2895 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2896 return false;
2897
2898 bool HasRet = false;
2899 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2900 UI != UE; ++UI) {
2901 if (UI->getOpcode() != X86ISD::RET_FLAG)
2902 return false;
2903 // If we are returning more than one value, we can definitely
2904 // not make a tail call see PR19530
2905 if (UI->getNumOperands() > 4)
2906 return false;
2907 if (UI->getNumOperands() == 4 &&
2908 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2909 return false;
2910 HasRet = true;
2911 }
2912
2913 if (!HasRet)
2914 return false;
2915
2916 Chain = TCChain;
2917 return true;
2918}
2919
2920EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2921 ISD::NodeType ExtendKind) const {
2922 MVT ReturnMVT = MVT::i32;
2923
2924 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2925 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2926 // The ABI does not require i1, i8 or i16 to be extended.
2927 //
2928 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2929 // always extending i8/i16 return values, so keep doing that for now.
2930 // (PR26665).
2931 ReturnMVT = MVT::i8;
2932 }
2933
2934 EVT MinVT = getRegisterType(Context, ReturnMVT);
2935 return VT.bitsLT(MinVT) ? MinVT : VT;
2936}
2937
2938/// Reads two 32 bit registers and creates a 64 bit mask value.
2939/// \param VA The current 32 bit value that need to be assigned.
2940/// \param NextVA The next 32 bit value that need to be assigned.
2941/// \param Root The parent DAG node.
2942/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2943/// glue purposes. In the case the DAG is already using
2944/// physical register instead of virtual, we should glue
2945/// our new SDValue to InFlag SDvalue.
2946/// \return a new SDvalue of size 64bit.
2947static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2948 SDValue &Root, SelectionDAG &DAG,
2949 const SDLoc &Dl, const X86Subtarget &Subtarget,
2950 SDValue *InFlag = nullptr) {
2951 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2951, __extension__ __PRETTY_FUNCTION__))
;
2952 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2952, __extension__ __PRETTY_FUNCTION__))
;
2953 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2954, __extension__ __PRETTY_FUNCTION__))
2954 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2954, __extension__ __PRETTY_FUNCTION__))
;
2955 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2956, __extension__ __PRETTY_FUNCTION__))
2956 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2956, __extension__ __PRETTY_FUNCTION__))
;
2957 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2958, __extension__ __PRETTY_FUNCTION__))
2958 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2958, __extension__ __PRETTY_FUNCTION__))
;
2959
2960 SDValue Lo, Hi;
2961 SDValue ArgValueLo, ArgValueHi;
2962
2963 MachineFunction &MF = DAG.getMachineFunction();
2964 const TargetRegisterClass *RC = &X86::GR32RegClass;
2965
2966 // Read a 32 bit value from the registers.
2967 if (nullptr == InFlag) {
2968 // When no physical register is present,
2969 // create an intermediate virtual register.
2970 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
2971 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2972 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2973 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2974 } else {
2975 // When a physical register is available read the value from it and glue
2976 // the reads together.
2977 ArgValueLo =
2978 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2979 *InFlag = ArgValueLo.getValue(2);
2980 ArgValueHi =
2981 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2982 *InFlag = ArgValueHi.getValue(2);
2983 }
2984
2985 // Convert the i32 type into v32i1 type.
2986 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2987
2988 // Convert the i32 type into v32i1 type.
2989 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2990
2991 // Concatenate the two values together.
2992 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2993}
2994
2995/// The function will lower a register of various sizes (8/16/32/64)
2996/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2997/// \returns a DAG node contains the operand after lowering to mask type.
2998static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2999 const EVT &ValLoc, const SDLoc &Dl,
3000 SelectionDAG &DAG) {
3001 SDValue ValReturned = ValArg;
3002
3003 if (ValVT == MVT::v1i1)
3004 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3005
3006 if (ValVT == MVT::v64i1) {
3007 // In 32 bit machine, this case is handled by getv64i1Argument
3008 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3008, __extension__ __PRETTY_FUNCTION__))
;
3009 // In 64 bit machine, There is no need to truncate the value only bitcast
3010 } else {
3011 MVT maskLen;
3012 switch (ValVT.getSimpleVT().SimpleTy) {
3013 case MVT::v8i1:
3014 maskLen = MVT::i8;
3015 break;
3016 case MVT::v16i1:
3017 maskLen = MVT::i16;
3018 break;
3019 case MVT::v32i1:
3020 maskLen = MVT::i32;
3021 break;
3022 default:
3023 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3023)
;
3024 }
3025
3026 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3027 }
3028 return DAG.getBitcast(ValVT, ValReturned);
3029}
3030
3031/// Lower the result values of a call into the
3032/// appropriate copies out of appropriate physical registers.
3033///
3034SDValue X86TargetLowering::LowerCallResult(
3035 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3036 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3037 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3038 uint32_t *RegMask) const {
3039
3040 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3041 // Assign locations to each value returned by this call.
3042 SmallVector<CCValAssign, 16> RVLocs;
3043 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3044 *DAG.getContext());
3045 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3046
3047 // Copy all of the result registers out of their specified physreg.
3048 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3049 ++I, ++InsIndex) {
3050 CCValAssign &VA = RVLocs[I];
3051 EVT CopyVT = VA.getLocVT();
3052
3053 // In some calling conventions we need to remove the used registers
3054 // from the register mask.
3055 if (RegMask) {
3056 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3057 SubRegs.isValid(); ++SubRegs)
3058 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3059 }
3060
3061 // Report an error if there was an attempt to return FP values via XMM
3062 // registers.
3063 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3064 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3065 if (VA.getLocReg() == X86::XMM1)
3066 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3067 else
3068 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3069 } else if (!Subtarget.hasSSE2() &&
3070 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3071 CopyVT == MVT::f64) {
3072 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3073 if (VA.getLocReg() == X86::XMM1)
3074 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3075 else
3076 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3077 }
3078
3079 // If we prefer to use the value in xmm registers, copy it out as f80 and
3080 // use a truncate to move it from fp stack reg to xmm reg.
3081 bool RoundAfterCopy = false;
3082 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3083 isScalarFPTypeInSSEReg(VA.getValVT())) {
3084 if (!Subtarget.hasX87())
3085 report_fatal_error("X87 register return with X87 disabled");
3086 CopyVT = MVT::f80;
3087 RoundAfterCopy = (CopyVT != VA.getLocVT());
3088 }
3089
3090 SDValue Val;
3091 if (VA.needsCustom()) {
3092 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3093, __extension__ __PRETTY_FUNCTION__))
3093 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3093, __extension__ __PRETTY_FUNCTION__))
;
3094 Val =
3095 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3096 } else {
3097 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3098 .getValue(1);
3099 Val = Chain.getValue(0);
3100 InFlag = Chain.getValue(2);
3101 }
3102
3103 if (RoundAfterCopy)
3104 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3105 // This truncation won't change the value.
3106 DAG.getIntPtrConstant(1, dl));
3107
3108 if (VA.isExtInLoc()) {
3109 if (VA.getValVT().isVector() &&
3110 VA.getValVT().getScalarType() == MVT::i1 &&
3111 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3112 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3113 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3114 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3115 } else
3116 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3117 }
3118
3119 if (VA.getLocInfo() == CCValAssign::BCvt)
3120 Val = DAG.getBitcast(VA.getValVT(), Val);
3121
3122 InVals.push_back(Val);
3123 }
3124
3125 return Chain;
3126}
3127
3128//===----------------------------------------------------------------------===//
3129// C & StdCall & Fast Calling Convention implementation
3130//===----------------------------------------------------------------------===//
3131// StdCall calling convention seems to be standard for many Windows' API
3132// routines and around. It differs from C calling convention just a little:
3133// callee should clean up the stack, not caller. Symbols should be also
3134// decorated in some fancy way :) It doesn't support any vector arguments.
3135// For info on fast calling convention see Fast Calling Convention (tail call)
3136// implementation LowerX86_32FastCCCallTo.
3137
3138/// CallIsStructReturn - Determines whether a call uses struct return
3139/// semantics.
3140enum StructReturnType {
3141 NotStructReturn,
3142 RegStructReturn,
3143 StackStructReturn
3144};
3145static StructReturnType
3146callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3147 if (Outs.empty())
3148 return NotStructReturn;
3149
3150 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3151 if (!Flags.isSRet())
3152 return NotStructReturn;
3153 if (Flags.isInReg() || IsMCU)
3154 return RegStructReturn;
3155 return StackStructReturn;
3156}
3157
3158/// Determines whether a function uses struct return semantics.
3159static StructReturnType
3160argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3161 if (Ins.empty())
3162 return NotStructReturn;
3163
3164 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3165 if (!Flags.isSRet())
3166 return NotStructReturn;
3167 if (Flags.isInReg() || IsMCU)
3168 return RegStructReturn;
3169 return StackStructReturn;
3170}
3171
3172/// Make a copy of an aggregate at address specified by "Src" to address
3173/// "Dst" with size and alignment information specified by the specific
3174/// parameter attribute. The copy will be passed as a byval function parameter.
3175static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3176 SDValue Chain, ISD::ArgFlagsTy Flags,
3177 SelectionDAG &DAG, const SDLoc &dl) {
3178 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3179
3180 return DAG.getMemcpy(
3181 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3182 /*isVolatile*/ false, /*AlwaysInline=*/true,
3183 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3184}
3185
3186/// Return true if the calling convention is one that we can guarantee TCO for.
3187static bool canGuaranteeTCO(CallingConv::ID CC) {
3188 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3189 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3190 CC == CallingConv::HHVM || CC == CallingConv::Tail ||
3191 CC == CallingConv::SwiftTail);
3192}
3193
3194/// Return true if we might ever do TCO for calls with this calling convention.
3195static bool mayTailCallThisCC(CallingConv::ID CC) {
3196 switch (CC) {
3197 // C calling conventions:
3198 case CallingConv::C:
3199 case CallingConv::Win64:
3200 case CallingConv::X86_64_SysV:
3201 // Callee pop conventions:
3202 case CallingConv::X86_ThisCall:
3203 case CallingConv::X86_StdCall:
3204 case CallingConv::X86_VectorCall:
3205 case CallingConv::X86_FastCall:
3206 // Swift:
3207 case CallingConv::Swift:
3208 return true;
3209 default:
3210 return canGuaranteeTCO(CC);
3211 }
3212}
3213
3214/// Return true if the function is being made into a tailcall target by
3215/// changing its ABI.
3216static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3217 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3218 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3219}
3220
3221bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3222 if (!CI->isTailCall())
3223 return false;
3224
3225 CallingConv::ID CalleeCC = CI->getCallingConv();
3226 if (!mayTailCallThisCC(CalleeCC))
3227 return false;
3228
3229 return true;
3230}
3231
3232SDValue
3233X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3234 const SmallVectorImpl<ISD::InputArg> &Ins,
3235 const SDLoc &dl, SelectionDAG &DAG,
3236 const CCValAssign &VA,
3237 MachineFrameInfo &MFI, unsigned i) const {
3238 // Create the nodes corresponding to a load from this parameter slot.
3239 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3240 bool AlwaysUseMutable = shouldGuaranteeTCO(
3241 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3242 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3243 EVT ValVT;
3244 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3245
3246 // If value is passed by pointer we have address passed instead of the value
3247 // itself. No need to extend if the mask value and location share the same
3248 // absolute size.
3249 bool ExtendedInMem =
3250 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3251 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3252
3253 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3254 ValVT = VA.getLocVT();
3255 else
3256 ValVT = VA.getValVT();
3257
3258 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3259 // changed with more analysis.
3260 // In case of tail call optimization mark all arguments mutable. Since they
3261 // could be overwritten by lowering of arguments in case of a tail call.
3262 if (Flags.isByVal()) {
3263 unsigned Bytes = Flags.getByValSize();
3264 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3265
3266 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3267 // can be improved with deeper analysis.
3268 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3269 /*isAliased=*/true);
3270 return DAG.getFrameIndex(FI, PtrVT);
3271 }
3272
3273 EVT ArgVT = Ins[i].ArgVT;
3274
3275 // If this is a vector that has been split into multiple parts, and the
3276 // scalar size of the parts don't match the vector element size, then we can't
3277 // elide the copy. The parts will have padding between them instead of being
3278 // packed like a vector.
3279 bool ScalarizedAndExtendedVector =
3280 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3281 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3282
3283 // This is an argument in memory. We might be able to perform copy elision.
3284 // If the argument is passed directly in memory without any extension, then we
3285 // can perform copy elision. Large vector types, for example, may be passed
3286 // indirectly by pointer.
3287 if (Flags.isCopyElisionCandidate() &&
3288 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3289 !ScalarizedAndExtendedVector) {
3290 SDValue PartAddr;
3291 if (Ins[i].PartOffset == 0) {
3292 // If this is a one-part value or the first part of a multi-part value,
3293 // create a stack object for the entire argument value type and return a
3294 // load from our portion of it. This assumes that if the first part of an
3295 // argument is in memory, the rest will also be in memory.
3296 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3297 /*IsImmutable=*/false);
3298 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3299 return DAG.getLoad(
3300 ValVT, dl, Chain, PartAddr,
3301 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3302 } else {
3303 // This is not the first piece of an argument in memory. See if there is
3304 // already a fixed stack object including this offset. If so, assume it
3305 // was created by the PartOffset == 0 branch above and create a load from
3306 // the appropriate offset into it.
3307 int64_t PartBegin = VA.getLocMemOffset();
3308 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3309 int FI = MFI.getObjectIndexBegin();
3310 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3311 int64_t ObjBegin = MFI.getObjectOffset(FI);
3312 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3313 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3314 break;
3315 }
3316 if (MFI.isFixedObjectIndex(FI)) {
3317 SDValue Addr =
3318 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3319 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3320 return DAG.getLoad(
3321 ValVT, dl, Chain, Addr,
3322 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3323 Ins[i].PartOffset));
3324 }
3325 }
3326 }
3327
3328 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3329 VA.getLocMemOffset(), isImmutable);
3330
3331 // Set SExt or ZExt flag.
3332 if (VA.getLocInfo() == CCValAssign::ZExt) {
3333 MFI.setObjectZExt(FI, true);
3334 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3335 MFI.setObjectSExt(FI, true);
3336 }
3337
3338 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3339 SDValue Val = DAG.getLoad(
3340 ValVT, dl, Chain, FIN,
3341 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3342 return ExtendedInMem
3343 ? (VA.getValVT().isVector()
3344 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3345 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3346 : Val;
3347}
3348
3349// FIXME: Get this from tablegen.
3350static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3351 const X86Subtarget &Subtarget) {
3352 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3352, __extension__ __PRETTY_FUNCTION__))
;
3353
3354 if (Subtarget.isCallingConvWin64(CallConv)) {
3355 static const MCPhysReg GPR64ArgRegsWin64[] = {
3356 X86::RCX, X86::RDX, X86::R8, X86::R9
3357 };
3358 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3359 }
3360
3361 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3362 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3363 };
3364 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3365}
3366
3367// FIXME: Get this from tablegen.
3368static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3369 CallingConv::ID CallConv,
3370 const X86Subtarget &Subtarget) {
3371 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3371, __extension__ __PRETTY_FUNCTION__))
;
3372 if (Subtarget.isCallingConvWin64(CallConv)) {
3373 // The XMM registers which might contain var arg parameters are shadowed
3374 // in their paired GPR. So we only need to save the GPR to their home
3375 // slots.
3376 // TODO: __vectorcall will change this.
3377 return None;
3378 }
3379
3380 bool isSoftFloat = Subtarget.useSoftFloat();
3381 if (isSoftFloat || !Subtarget.hasSSE1())
3382 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3383 // registers.
3384 return None;
3385
3386 static const MCPhysReg XMMArgRegs64Bit[] = {
3387 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3388 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3389 };
3390 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3391}
3392
3393#ifndef NDEBUG
3394static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3395 return llvm::is_sorted(
3396 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3397 return A.getValNo() < B.getValNo();
3398 });
3399}
3400#endif
3401
3402namespace {
3403/// This is a helper class for lowering variable arguments parameters.
3404class VarArgsLoweringHelper {
3405public:
3406 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3407 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3408 CallingConv::ID CallConv, CCState &CCInfo)
3409 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3410 TheMachineFunction(DAG.getMachineFunction()),
3411 TheFunction(TheMachineFunction.getFunction()),
3412 FrameInfo(TheMachineFunction.getFrameInfo()),
3413 FrameLowering(*Subtarget.getFrameLowering()),
3414 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3415 CCInfo(CCInfo) {}
3416
3417 // Lower variable arguments parameters.
3418 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3419
3420private:
3421 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3422
3423 void forwardMustTailParameters(SDValue &Chain);
3424
3425 bool is64Bit() const { return Subtarget.is64Bit(); }
3426 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3427
3428 X86MachineFunctionInfo *FuncInfo;
3429 const SDLoc &DL;
3430 SelectionDAG &DAG;
3431 const X86Subtarget &Subtarget;
3432 MachineFunction &TheMachineFunction;
3433 const Function &TheFunction;
3434 MachineFrameInfo &FrameInfo;
3435 const TargetFrameLowering &FrameLowering;
3436 const TargetLowering &TargLowering;
3437 CallingConv::ID CallConv;
3438 CCState &CCInfo;
3439};
3440} // namespace
3441
3442void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3443 SDValue &Chain, unsigned StackSize) {
3444 // If the function takes variable number of arguments, make a frame index for
3445 // the start of the first vararg value... for expansion of llvm.va_start. We
3446 // can skip this if there are no va_start calls.
3447 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3448 CallConv != CallingConv::X86_ThisCall)) {
3449 FuncInfo->setVarArgsFrameIndex(
3450 FrameInfo.CreateFixedObject(1, StackSize, true));
3451 }
3452
3453 // 64-bit calling conventions support varargs and register parameters, so we
3454 // have to do extra work to spill them in the prologue.
3455 if (is64Bit()) {
3456 // Find the first unallocated argument registers.
3457 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3458 ArrayRef<MCPhysReg> ArgXMMs =
3459 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3460 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3461 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3462
3463 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3464, __extension__ __PRETTY_FUNCTION__))
3464 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3464, __extension__ __PRETTY_FUNCTION__))
;
3465
3466 if (isWin64()) {
3467 // Get to the caller-allocated home save location. Add 8 to account
3468 // for the return address.
3469 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3470 FuncInfo->setRegSaveFrameIndex(
3471 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3472 // Fixup to set vararg frame on shadow area (4 x i64).
3473 if (NumIntRegs < 4)
3474 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3475 } else {
3476 // For X86-64, if there are vararg parameters that are passed via
3477 // registers, then we must store them to their spots on the stack so
3478 // they may be loaded by dereferencing the result of va_next.
3479 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3480 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3481 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3482 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3483 }
3484
3485 SmallVector<SDValue, 6>
3486 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3487 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3488 // keeping live input value
3489 SDValue ALVal; // if applicable keeps SDValue for %al register
3490
3491 // Gather all the live in physical registers.
3492 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3493 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3494 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3495 }
3496 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3497 if (!AvailableXmms.empty()) {
3498 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3499 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3500 for (MCPhysReg Reg : AvailableXmms) {
3501 // FastRegisterAllocator spills virtual registers at basic
3502 // block boundary. That leads to usages of xmm registers
3503 // outside of check for %al. Pass physical registers to
3504 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
3505 TheMachineFunction.getRegInfo().addLiveIn(Reg);
3506 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
3507 }
3508 }
3509
3510 // Store the integer parameter registers.
3511 SmallVector<SDValue, 8> MemOps;
3512 SDValue RSFIN =
3513 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3514 TargLowering.getPointerTy(DAG.getDataLayout()));
3515 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3516 for (SDValue Val : LiveGPRs) {
3517 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3518 TargLowering.getPointerTy(DAG.getDataLayout()),
3519 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3520 SDValue Store =
3521 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3522 MachinePointerInfo::getFixedStack(
3523 DAG.getMachineFunction(),
3524 FuncInfo->getRegSaveFrameIndex(), Offset));
3525 MemOps.push_back(Store);
3526 Offset += 8;
3527 }
3528
3529 // Now store the XMM (fp + vector) parameter registers.
3530 if (!LiveXMMRegs.empty()) {
3531 SmallVector<SDValue, 12> SaveXMMOps;
3532 SaveXMMOps.push_back(Chain);
3533 SaveXMMOps.push_back(ALVal);
3534 SaveXMMOps.push_back(
3535 DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
3536 SaveXMMOps.push_back(
3537 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3538 llvm::append_range(SaveXMMOps, LiveXMMRegs);
3539 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
3540 MVT::Other, SaveXMMOps));
3541 }
3542
3543 if (!MemOps.empty())
3544 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3545 }
3546}
3547
3548void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3549 // Find the largest legal vector type.
3550 MVT VecVT = MVT::Other;
3551 // FIXME: Only some x86_32 calling conventions support AVX512.
3552 if (Subtarget.useAVX512Regs() &&
3553 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3554 CallConv == CallingConv::Intel_OCL_BI)))
3555 VecVT = MVT::v16f32;
3556 else if (Subtarget.hasAVX())
3557 VecVT = MVT::v8f32;
3558 else if (Subtarget.hasSSE2())
3559 VecVT = MVT::v4f32;
3560
3561 // We forward some GPRs and some vector types.
3562 SmallVector<MVT, 2> RegParmTypes;
3563 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3564 RegParmTypes.push_back(IntVT);
3565 if (VecVT != MVT::Other)
3566 RegParmTypes.push_back(VecVT);
3567
3568 // Compute the set of forwarded registers. The rest are scratch.
3569 SmallVectorImpl<ForwardedRegister> &Forwards =
3570 FuncInfo->getForwardedMustTailRegParms();
3571 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3572
3573 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3574 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3575 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3576 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3577 }
3578
3579 // Copy all forwards from physical to virtual registers.
3580 for (ForwardedRegister &FR : Forwards) {
3581 // FIXME: Can we use a less constrained schedule?
3582 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3583 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3584 TargLowering.getRegClassFor(FR.VT));
3585 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3586 }
3587}
3588
3589void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3590 unsigned StackSize) {
3591 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3592 // If necessary, it would be set into the correct value later.
3593 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3594 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3595
3596 if (FrameInfo.hasVAStart())
3597 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3598
3599 if (FrameInfo.hasMustTailInVarArgFunc())
3600 forwardMustTailParameters(Chain);
3601}
3602
3603SDValue X86TargetLowering::LowerFormalArguments(
3604 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3605 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3606 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3607 MachineFunction &MF = DAG.getMachineFunction();
3608 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3609
3610 const Function &F = MF.getFunction();
3611 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3612 F.getName() == "main")
3613 FuncInfo->setForceFramePointer(true);
3614
3615 MachineFrameInfo &MFI = MF.getFrameInfo();
3616 bool Is64Bit = Subtarget.is64Bit();
3617 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3618
3619 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __extension__ __PRETTY_FUNCTION__))
3620 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __extension__ __PRETTY_FUNCTION__))
3621 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __extension__ __PRETTY_FUNCTION__))
;
3622
3623 // Assign locations to all of the incoming arguments.
3624 SmallVector<CCValAssign, 16> ArgLocs;
3625 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3626
3627 // Allocate shadow area for Win64.
3628 if (IsWin64)
3629 CCInfo.AllocateStack(32, Align(8));
3630
3631 CCInfo.AnalyzeArguments(Ins, CC_X86);
3632
3633 // In vectorcall calling convention a second pass is required for the HVA
3634 // types.
3635 if (CallingConv::X86_VectorCall == CallConv) {
3636 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3637 }
3638
3639 // The next loop assumes that the locations are in the same order of the
3640 // input arguments.
3641 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3642, __extension__ __PRETTY_FUNCTION__))
3642 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3642, __extension__ __PRETTY_FUNCTION__))
;
3643
3644 SDValue ArgValue;
3645 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3646 ++I, ++InsIndex) {
3647 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3647, __extension__ __PRETTY_FUNCTION__))
;
3648 CCValAssign &VA = ArgLocs[I];
3649
3650 if (VA.isRegLoc()) {
3651 EVT RegVT = VA.getLocVT();
3652 if (VA.needsCustom()) {
3653 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3655, __extension__ __PRETTY_FUNCTION__))
3654 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3655, __extension__ __PRETTY_FUNCTION__))
3655 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3655, __extension__ __PRETTY_FUNCTION__))
;
3656
3657 // v64i1 values, in regcall calling convention, that are
3658 // compiled to 32 bit arch, are split up into two registers.
3659 ArgValue =
3660 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3661 } else {
3662 const TargetRegisterClass *RC;
3663 if (RegVT == MVT::i8)
3664 RC = &X86::GR8RegClass;
3665 else if (RegVT == MVT::i16)
3666 RC = &X86::GR16RegClass;
3667 else if (RegVT == MVT::i32)
3668 RC = &X86::GR32RegClass;
3669 else if (Is64Bit && RegVT == MVT::i64)
3670 RC = &X86::GR64RegClass;
3671 else if (RegVT == MVT::f32)
3672 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3673 else if (RegVT == MVT::f64)
3674 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3675 else if (RegVT == MVT::f80)
3676 RC = &X86::RFP80RegClass;
3677 else if (RegVT == MVT::f128)
3678 RC = &X86::VR128RegClass;
3679 else if (RegVT.is512BitVector())
3680 RC = &X86::VR512RegClass;
3681 else if (RegVT.is256BitVector())
3682 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3683 else if (RegVT.is128BitVector())
3684 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3685 else if (RegVT == MVT::x86mmx)
3686 RC = &X86::VR64RegClass;
3687 else if (RegVT == MVT::v1i1)
3688 RC = &X86::VK1RegClass;
3689 else if (RegVT == MVT::v8i1)
3690 RC = &X86::VK8RegClass;
3691 else if (RegVT == MVT::v16i1)
3692 RC = &X86::VK16RegClass;
3693 else if (RegVT == MVT::v32i1)
3694 RC = &X86::VK32RegClass;
3695 else if (RegVT == MVT::v64i1)
3696 RC = &X86::VK64RegClass;
3697 else
3698 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3698)
;
3699
3700 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3701 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3702 }
3703
3704 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3705 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3706 // right size.
3707 if (VA.getLocInfo() == CCValAssign::SExt)
3708 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3709 DAG.getValueType(VA.getValVT()));
3710 else if (VA.getLocInfo() == CCValAssign::ZExt)
3711 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3712 DAG.getValueType(VA.getValVT()));
3713 else if (VA.getLocInfo() == CCValAssign::BCvt)
3714 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3715
3716 if (VA.isExtInLoc()) {
3717 // Handle MMX values passed in XMM regs.
3718 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3719 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3720 else if (VA.getValVT().isVector() &&
3721 VA.getValVT().getScalarType() == MVT::i1 &&
3722 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3723 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3724 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3725 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3726 } else
3727 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3728 }
3729 } else {
3730 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3730, __extension__ __PRETTY_FUNCTION__))
;
3731 ArgValue =
3732 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3733 }
3734
3735 // If value is passed via pointer - do a load.
3736 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3737 ArgValue =
3738 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3739
3740 InVals.push_back(ArgValue);
3741 }
3742
3743 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3744 if (Ins[I].Flags.isSwiftAsync()) {
3745 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
3746 if (Subtarget.is64Bit())
3747 X86FI->setHasSwiftAsyncContext(true);
3748 else {
3749 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
3750 X86FI->setSwiftAsyncContextFrameIdx(FI);
3751 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
3752 DAG.getFrameIndex(FI, MVT::i32),
3753 MachinePointerInfo::getFixedStack(MF, FI));
3754 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
3755 }
3756 }
3757
3758 // Swift calling convention does not require we copy the sret argument
3759 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3760 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
3761 continue;
3762
3763 // All x86 ABIs require that for returning structs by value we copy the
3764 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3765 // the argument into a virtual register so that we can access it from the
3766 // return points.
3767 if (Ins[I].Flags.isSRet()) {
3768 Register Reg = FuncInfo->getSRetReturnReg();
3769 if (!Reg) {
3770 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3771 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3772 FuncInfo->setSRetReturnReg(Reg);
3773 }
3774 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3775 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3776 break;
3777 }
3778 }
3779
3780 unsigned StackSize = CCInfo.getNextStackOffset();
3781 // Align stack specially for tail calls.
3782 if (shouldGuaranteeTCO(CallConv,
3783 MF.getTarget().Options.GuaranteedTailCallOpt))
3784 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3785
3786 if (IsVarArg)
3787 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
3788 .lowerVarArgsParameters(Chain, StackSize);
3789
3790 // Some CCs need callee pop.
3791 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
3792 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3793 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3794 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3795 // X86 interrupts must pop the error code (and the alignment padding) if
3796 // present.
3797 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3798 } else {
3799 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3800 // If this is an sret function, the return should pop the hidden pointer.
3801 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3802 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3803 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3804 FuncInfo->setBytesToPopOnReturn(4);
3805 }
3806
3807 if (!Is64Bit) {
3808 // RegSaveFrameIndex is X86-64 only.
3809 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3810 }
3811
3812 FuncInfo->setArgumentStackSize(StackSize);
3813
3814 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3815 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3816 if (Personality == EHPersonality::CoreCLR) {
3817 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3817, __extension__ __PRETTY_FUNCTION__))
;
3818 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3819 // that we'd prefer this slot be allocated towards the bottom of the frame
3820 // (i.e. near the stack pointer after allocating the frame). Every
3821 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3822 // offset from the bottom of this and each funclet's frame must be the
3823 // same, so the size of funclets' (mostly empty) frames is dictated by
3824 // how far this slot is from the bottom (since they allocate just enough
3825 // space to accommodate holding this slot at the correct offset).
3826 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
3827 EHInfo->PSPSymFrameIdx = PSPSymFI;
3828 }
3829 }
3830
3831 if (CallConv == CallingConv::X86_RegCall ||
3832 F.hasFnAttribute("no_caller_saved_registers")) {
3833 MachineRegisterInfo &MRI = MF.getRegInfo();
3834 for (std::pair<Register, Register> Pair : MRI.liveins())
3835 MRI.disableCalleeSavedRegister(Pair.first);
3836 }
3837
3838 return Chain;
3839}
3840
3841SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3842 SDValue Arg, const SDLoc &dl,
3843 SelectionDAG &DAG,
3844 const CCValAssign &VA,
3845 ISD::ArgFlagsTy Flags,
3846 bool isByVal) const {
3847 unsigned LocMemOffset = VA.getLocMemOffset();
3848 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3849 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3850 StackPtr, PtrOff);
3851 if (isByVal)
3852 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3853
3854 return DAG.getStore(
3855 Chain, dl, Arg, PtrOff,
3856 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3857}
3858
3859/// Emit a load of return address if tail call
3860/// optimization is performed and it is required.
3861SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3862 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3863 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3864 // Adjust the Return address stack slot.
3865 EVT VT = getPointerTy(DAG.getDataLayout());
3866 OutRetAddr = getReturnAddressFrameIndex(DAG);
3867
3868 // Load the "old" Return address.
3869 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3870 return SDValue(OutRetAddr.getNode(), 1);
3871}
3872
3873/// Emit a store of the return address if tail call
3874/// optimization is performed and it is required (FPDiff!=0).
3875static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3876 SDValue Chain, SDValue RetAddrFrIdx,
3877 EVT PtrVT, unsigned SlotSize,
3878 int FPDiff, const SDLoc &dl) {
3879 // Store the return address to the appropriate stack slot.
3880 if (!FPDiff) return Chain;
3881 // Calculate the new stack slot for the return address.
3882 int NewReturnAddrFI =
3883 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3884 false);
3885 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3886 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3887 MachinePointerInfo::getFixedStack(
3888 DAG.getMachineFunction(), NewReturnAddrFI));
3889 return Chain;
3890}
3891
3892/// Returns a vector_shuffle mask for an movs{s|d}, movd
3893/// operation of specified width.
3894static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3895 SDValue V2) {
3896 unsigned NumElems = VT.getVectorNumElements();
3897 SmallVector<int, 8> Mask;
3898 Mask.push_back(NumElems);
3899 for (unsigned i = 1; i != NumElems; ++i)
3900 Mask.push_back(i);
3901 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3902}
3903
3904SDValue
3905X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3906 SmallVectorImpl<SDValue> &InVals) const {
3907 SelectionDAG &DAG = CLI.DAG;
3908 SDLoc &dl = CLI.DL;
3909 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3910 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3911 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3912 SDValue Chain = CLI.Chain;
3913 SDValue Callee = CLI.Callee;
3914 CallingConv::ID CallConv = CLI.CallConv;
3915 bool &isTailCall = CLI.IsTailCall;
3916 bool isVarArg = CLI.IsVarArg;
3917 const auto *CB = CLI.CB;
3918
3919 MachineFunction &MF = DAG.getMachineFunction();
3920 bool Is64Bit = Subtarget.is64Bit();
3921 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3922 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3923 bool IsSibcall = false;
3924 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3925 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
3926 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3927 bool HasNCSR = (CB && isa<CallInst>(CB) &&
3928 CB->hasFnAttr("no_caller_saved_registers"));
3929 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
3930 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
3931 const Module *M = MF.getMMI().getModule();
3932 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3933
3934 MachineFunction::CallSiteInfo CSInfo;
3935 if (CallConv == CallingConv::X86_INTR)
3936 report_fatal_error("X86 interrupts may not be called directly");
3937
3938 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
3939 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
3940 // If we are using a GOT, disable tail calls to external symbols with
3941 // default visibility. Tail calling such a symbol requires using a GOT
3942 // relocation, which forces early binding of the symbol. This breaks code
3943 // that require lazy function symbol resolution. Using musttail or
3944 // GuaranteedTailCallOpt will override this.
3945 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3946 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3947 G->getGlobal()->hasDefaultVisibility()))
3948 isTailCall = false;
3949 }
3950
3951
3952 if (isTailCall && !IsMustTail) {
3953 // Check if it's really possible to do a tail call.
3954 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3955 isVarArg, SR != NotStructReturn,
3956 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3957 Outs, OutVals, Ins, DAG);
3958
3959 // Sibcalls are automatically detected tailcalls which do not require
3960 // ABI changes.
3961 if (!IsGuaranteeTCO && isTailCall)
3962 IsSibcall = true;
3963
3964 if (isTailCall)
3965 ++NumTailCalls;
3966 }
3967
3968 if (IsMustTail && !isTailCall)
3969 report_fatal_error("failed to perform tail call elimination on a call "
3970 "site marked musttail");
3971
3972 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3973, __extension__ __PRETTY_FUNCTION__))
3973 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3973, __extension__ __PRETTY_FUNCTION__))
;
3974
3975 // Analyze operands of the call, assigning locations to each operand.
3976 SmallVector<CCValAssign, 16> ArgLocs;
3977 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3978
3979 // Allocate shadow area for Win64.
3980 if (IsWin64)
3981 CCInfo.AllocateStack(32, Align(8));
3982
3983 CCInfo.AnalyzeArguments(Outs, CC_X86);
3984
3985 // In vectorcall calling convention a second pass is required for the HVA
3986 // types.
3987 if (CallingConv::X86_VectorCall == CallConv) {
3988 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3989 }
3990
3991 // Get a count of how many bytes are to be pushed on the stack.
3992 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3993 if (IsSibcall)
3994 // This is a sibcall. The memory operands are available in caller's
3995 // own caller's stack.
3996 NumBytes = 0;
3997 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
3998 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3999
4000 int FPDiff = 0;
4001 if (isTailCall &&
4002 shouldGuaranteeTCO(CallConv,
4003 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4004 // Lower arguments at fp - stackoffset + fpdiff.
4005 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4006
4007 FPDiff = NumBytesCallerPushed - NumBytes;
4008
4009 // Set the delta of movement of the returnaddr stackslot.
4010 // But only set if delta is greater than previous delta.
4011 if (FPDiff < X86Info->getTCReturnAddrDelta())
4012 X86Info->setTCReturnAddrDelta(FPDiff);
4013 }
4014
4015 unsigned NumBytesToPush = NumBytes;
4016 unsigned NumBytesToPop = NumBytes;
4017
4018 // If we have an inalloca argument, all stack space has already been allocated
4019 // for us and be right at the top of the stack. We don't support multiple
4020 // arguments passed in memory when using inalloca.
4021 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4022 NumBytesToPush = 0;
4023 if (!ArgLocs.back().isMemLoc())
4024 report_fatal_error("cannot use inalloca attribute on a register "
4025 "parameter");
4026 if (ArgLocs.back().getLocMemOffset() != 0)
4027 report_fatal_error("any parameter with the inalloca attribute must be "
4028 "the only memory argument");
4029 } else if (CLI.IsPreallocated) {
4030 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4032, __extension__ __PRETTY_FUNCTION__))
4031 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4032, __extension__ __PRETTY_FUNCTION__))
4032 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4032, __extension__ __PRETTY_FUNCTION__))
;
4033 SmallVector<size_t, 4> PreallocatedOffsets;
4034 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4035 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4036 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4037 }
4038 }
4039 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4040 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4041 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4042 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4043 NumBytesToPush = 0;
4044 }
4045
4046 if (!IsSibcall && !IsMustTail)
4047 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4048 NumBytes - NumBytesToPush, dl);
4049
4050 SDValue RetAddrFrIdx;
4051 // Load return address for tail calls.
4052 if (isTailCall && FPDiff)
4053 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4054 Is64Bit, FPDiff, dl);
4055
4056 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4057 SmallVector<SDValue, 8> MemOpChains;
4058 SDValue StackPtr;
4059
4060 // The next loop assumes that the locations are in the same order of the
4061 // input arguments.
4062 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4063, __extension__ __PRETTY_FUNCTION__))
4063 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4063, __extension__ __PRETTY_FUNCTION__))
;
4064
4065 // Walk the register/memloc assignments, inserting copies/loads. In the case
4066 // of tail call optimization arguments are handle later.
4067 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4068 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4069 ++I, ++OutIndex) {
4070 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4070, __extension__ __PRETTY_FUNCTION__))
;
4071 // Skip inalloca/preallocated arguments, they have already been written.
4072 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4073 if (Flags.isInAlloca() || Flags.isPreallocated())
4074 continue;
4075
4076 CCValAssign &VA = ArgLocs[I];
4077 EVT RegVT = VA.getLocVT();
4078 SDValue Arg = OutVals[OutIndex];
4079 bool isByVal = Flags.isByVal();
4080
4081 // Promote the value if needed.
4082 switch (VA.getLocInfo()) {
4083 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4083)
;
4084 case CCValAssign::Full: break;
4085 case CCValAssign::SExt:
4086 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4087 break;
4088 case CCValAssign::ZExt:
4089 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4090 break;
4091 case CCValAssign::AExt:
4092 if (Arg.getValueType().isVector() &&
4093 Arg.getValueType().getVectorElementType() == MVT::i1)
4094 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4095 else if (RegVT.is128BitVector()) {
4096 // Special case: passing MMX values in XMM registers.
4097 Arg = DAG.getBitcast(MVT::i64, Arg);
4098 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4099 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4100 } else
4101 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4102 break;
4103 case CCValAssign::BCvt:
4104 Arg = DAG.getBitcast(RegVT, Arg);
4105 break;
4106 case CCValAssign::Indirect: {
4107 if (isByVal) {
4108 // Memcpy the argument to a temporary stack slot to prevent
4109 // the caller from seeing any modifications the callee may make
4110 // as guaranteed by the `byval` attribute.
4111 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4112 Flags.getByValSize(),
4113 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4114 SDValue StackSlot =
4115 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4116 Chain =
4117 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4118 // From now on treat this as a regular pointer
4119 Arg = StackSlot;
4120 isByVal = false;
4121 } else {
4122 // Store the argument.
4123 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4124 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4125 Chain = DAG.getStore(
4126 Chain, dl, Arg, SpillSlot,
4127 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4128 Arg = SpillSlot;
4129 }
4130 break;
4131 }
4132 }
4133
4134 if (VA.needsCustom()) {
4135 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4136, __extension__ __PRETTY_FUNCTION__))
4136 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4136, __extension__ __PRETTY_FUNCTION__))
;
4137 // Split v64i1 value into two registers
4138 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4139 } else if (VA.isRegLoc()) {
4140 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4141 const TargetOptions &Options = DAG.getTarget().Options;
4142 if (Options.EmitCallSiteInfo)
4143 CSInfo.emplace_back(VA.getLocReg(), I);
4144 if (isVarArg && IsWin64) {
4145 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4146 // shadow reg if callee is a varargs function.
4147 Register ShadowReg;
4148 switch (VA.getLocReg()) {
4149 case X86::XMM0: ShadowReg = X86::RCX; break;
4150 case X86::XMM1: ShadowReg = X86::RDX; break;
4151 case X86::XMM2: ShadowReg = X86::R8; break;
4152 case X86::XMM3: ShadowReg = X86::R9; break;
4153 }
4154 if (ShadowReg)
4155 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4156 }
4157 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4158 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4158, __extension__ __PRETTY_FUNCTION__))
;
4159 if (!StackPtr.getNode())
4160 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4161 getPointerTy(DAG.getDataLayout()));
4162 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4163 dl, DAG, VA, Flags, isByVal));
4164 }
4165 }
4166
4167 if (!MemOpChains.empty())
4168 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4169
4170 if (Subtarget.isPICStyleGOT()) {
4171 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4172 // GOT pointer (except regcall).
4173 if (!isTailCall) {
4174 // Indirect call with RegCall calling convertion may use up all the
4175 // general registers, so it is not suitable to bind EBX reister for
4176 // GOT address, just let register allocator handle it.
4177 if (CallConv != CallingConv::X86_RegCall)
4178 RegsToPass.push_back(std::make_pair(
4179 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4180 getPointerTy(DAG.getDataLayout()))));
4181 } else {
4182 // If we are tail calling and generating PIC/GOT style code load the
4183 // address of the callee into ECX. The value in ecx is used as target of
4184 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4185 // for tail calls on PIC/GOT architectures. Normally we would just put the
4186 // address of GOT into ebx and then call target@PLT. But for tail calls
4187 // ebx would be restored (since ebx is callee saved) before jumping to the
4188 // target@PLT.
4189
4190 // Note: The actual moving to ECX is done further down.
4191 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4192 if (G && !G->getGlobal()->hasLocalLinkage() &&
4193 G->getGlobal()->hasDefaultVisibility())
4194 Callee = LowerGlobalAddress(Callee, DAG);
4195 else if (isa<ExternalSymbolSDNode>(Callee))
4196 Callee = LowerExternalSymbol(Callee, DAG);
4197 }
4198 }
4199
4200 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4201 // From AMD64 ABI document:
4202 // For calls that may call functions that use varargs or stdargs
4203 // (prototype-less calls or calls to functions containing ellipsis (...) in
4204 // the declaration) %al is used as hidden argument to specify the number
4205 // of SSE registers used. The contents of %al do not need to match exactly
4206 // the number of registers, but must be an ubound on the number of SSE
4207 // registers used and is in the range 0 - 8 inclusive.
4208
4209 // Count the number of XMM registers allocated.
4210 static const MCPhysReg XMMArgRegs[] = {
4211 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4212 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4213 };
4214 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4215 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4216, __extension__ __PRETTY_FUNCTION__))
4216 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4216, __extension__ __PRETTY_FUNCTION__))
;
4217 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4218 DAG.getConstant(NumXMMRegs, dl,
4219 MVT::i8)));
4220 }
4221
4222 if (isVarArg && IsMustTail) {
4223 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4224 for (const auto &F : Forwards) {
4225 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4226 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4227 }
4228 }
4229
4230 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4231 // don't need this because the eligibility check rejects calls that require
4232 // shuffling arguments passed in memory.
4233 if (!IsSibcall && isTailCall) {
4234 // Force all the incoming stack arguments to be loaded from the stack
4235 // before any new outgoing arguments are stored to the stack, because the
4236 // outgoing stack slots may alias the incoming argument stack slots, and
4237 // the alias isn't otherwise explicit. This is slightly more conservative
4238 // than necessary, because it means that each store effectively depends
4239 // on every argument instead of just those arguments it would clobber.
4240 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4241
4242 SmallVector<SDValue, 8> MemOpChains2;
4243 SDValue FIN;
4244 int FI = 0;
4245 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4246 ++I, ++OutsIndex) {
4247 CCValAssign &VA = ArgLocs[I];
4248
4249 if (VA.isRegLoc()) {
4250 if (VA.needsCustom()) {
4251 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4252, __extension__ __PRETTY_FUNCTION__))
4252 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4252, __extension__ __PRETTY_FUNCTION__))
;
4253 // This means that we are in special case where one argument was
4254 // passed through two register locations - Skip the next location
4255 ++I;
4256 }
4257
4258 continue;
4259 }
4260
4261 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
;
4262 SDValue Arg = OutVals[OutsIndex];
4263 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4264 // Skip inalloca/preallocated arguments. They don't require any work.
4265 if (Flags.isInAlloca() || Flags.isPreallocated())
4266 continue;
4267 // Create frame index.
4268 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4269 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4270 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4271 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4272
4273 if (Flags.isByVal()) {
4274 // Copy relative to framepointer.
4275 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4276 if (!StackPtr.getNode())
4277 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4278 getPointerTy(DAG.getDataLayout()));
4279 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4280 StackPtr, Source);
4281
4282 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4283 ArgChain,
4284 Flags, DAG, dl));
4285 } else {
4286 // Store relative to framepointer.
4287 MemOpChains2.push_back(DAG.getStore(
4288 ArgChain, dl, Arg, FIN,
4289 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4290 }
4291 }
4292
4293 if (!MemOpChains2.empty())
4294 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4295
4296 // Store the return address to the appropriate stack slot.
4297 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4298 getPointerTy(DAG.getDataLayout()),
4299 RegInfo->getSlotSize(), FPDiff, dl);
4300 }
4301
4302 // Build a sequence of copy-to-reg nodes chained together with token chain
4303 // and flag operands which copy the outgoing args into registers.
4304 SDValue InFlag;
4305 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4306 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4307 RegsToPass[i].second, InFlag);
4308 InFlag = Chain.getValue(1);
4309 }
4310
4311 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4312 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4312, __extension__ __PRETTY_FUNCTION__))
;
4313 // In the 64-bit large code model, we have to make all calls
4314 // through a register, since the call instruction's 32-bit
4315 // pc-relative offset may not be large enough to hold the whole
4316 // address.
4317 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4318 Callee->getOpcode() == ISD::ExternalSymbol) {
4319 // Lower direct calls to global addresses and external symbols. Setting
4320 // ForCall to true here has the effect of removing WrapperRIP when possible
4321 // to allow direct calls to be selected without first materializing the
4322 // address into a register.
4323 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4324 } else if (Subtarget.isTarget64BitILP32() &&
4325 Callee->getValueType(0) == MVT::i32) {
4326 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4327 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4328 }
4329
4330 // Returns a chain & a flag for retval copy to use.
4331 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4332 SmallVector<SDValue, 8> Ops;
4333
4334 if (!IsSibcall && isTailCall && !IsMustTail) {
4335 Chain = DAG.getCALLSEQ_END(Chain,
4336 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4337 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4338 InFlag = Chain.getValue(1);
4339 }
4340
4341 Ops.push_back(Chain);
4342 Ops.push_back(Callee);
4343
4344 if (isTailCall)
4345 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4346
4347 // Add argument registers to the end of the list so that they are known live
4348 // into the call.
4349 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4350 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4351 RegsToPass[i].second.getValueType()));
4352
4353 // Add a register mask operand representing the call-preserved registers.
4354 const uint32_t *Mask = [&]() {
4355 auto AdaptedCC = CallConv;
4356 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4357 // use X86_INTR calling convention because it has the same CSR mask
4358 // (same preserved registers).
4359 if (HasNCSR)
4360 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4361 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4362 // to use the CSR_NoRegs_RegMask.
4363 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4364 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4365 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4366 }();
4367 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4367, __extension__ __PRETTY_FUNCTION__))
;
4368
4369 // If this is an invoke in a 32-bit function using a funclet-based
4370 // personality, assume the function clobbers all registers. If an exception
4371 // is thrown, the runtime will not restore CSRs.
4372 // FIXME: Model this more precisely so that we can register allocate across
4373 // the normal edge and spill and fill across the exceptional edge.
4374 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4375 const Function &CallerFn = MF.getFunction();
4376 EHPersonality Pers =
4377 CallerFn.hasPersonalityFn()
4378 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4379 : EHPersonality::Unknown;
4380 if (isFuncletEHPersonality(Pers))
4381 Mask = RegInfo->getNoPreservedMask();
4382 }
4383
4384 // Define a new register mask from the existing mask.
4385 uint32_t *RegMask = nullptr;
4386
4387 // In some calling conventions we need to remove the used physical registers
4388 // from the reg mask.
4389 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4390 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4391
4392 // Allocate a new Reg Mask and copy Mask.
4393 RegMask = MF.allocateRegMask();
4394 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4395 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4396
4397 // Make sure all sub registers of the argument registers are reset
4398 // in the RegMask.
4399 for (auto const &RegPair : RegsToPass)
4400 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4401 SubRegs.isValid(); ++SubRegs)
4402 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4403
4404 // Create the RegMask Operand according to our updated mask.
4405 Ops.push_back(DAG.getRegisterMask(RegMask));
4406 } else {
4407 // Create the RegMask Operand according to the static mask.
4408 Ops.push_back(DAG.getRegisterMask(Mask));
4409 }
4410
4411 if (InFlag.getNode())
4412 Ops.push_back(InFlag);
4413
4414 if (isTailCall) {
4415 // We used to do:
4416 //// If this is the first return lowered for this function, add the regs
4417 //// to the liveout set for the function.
4418 // This isn't right, although it's probably harmless on x86; liveouts
4419 // should be computed from returns not tail calls. Consider a void
4420 // function making a tail call to a function returning int.
4421 MF.getFrameInfo().setHasTailCall();
4422 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4423 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4424 return Ret;
4425 }
4426
4427 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4428 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4429 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4430 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4431 // expanded to the call, directly followed by a special marker sequence and
4432 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4433 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4434, __extension__ __PRETTY_FUNCTION__))
4434 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4434, __extension__ __PRETTY_FUNCTION__))
;
4435 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4435, __extension__ __PRETTY_FUNCTION__))
;
4436
4437 // Add target constant to select ObjC runtime call just before the call
4438 // target. RuntimeCallType == 0 selects objc_retainAutoreleasedReturnValue,
4439 // RuntimeCallType == 0 selects objc_unsafeClaimAutoreleasedReturnValue when
4440 // epxanding the pseudo.
4441 unsigned RuntimeCallType =
4442 objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1;
4443 Ops.insert(Ops.begin() + 1,
4444 DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32));
4445 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4446 } else {
4447 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4448 }
4449
4450 InFlag = Chain.getValue(1);
4451 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4452 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4453
4454 // Save heapallocsite metadata.
4455 if (CLI.CB)
4456 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4457 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4458
4459 // Create the CALLSEQ_END node.
4460 unsigned NumBytesForCalleeToPop;
4461 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4462 DAG.getTarget().Options.GuaranteedTailCallOpt))
4463 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4464 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4465 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4466 SR == StackStructReturn)
4467 // If this is a call to a struct-return function, the callee
4468 // pops the hidden struct pointer, so we have to push it back.
4469 // This is common for Darwin/X86, Linux & Mingw32 targets.
4470 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4471 NumBytesForCalleeToPop = 4;
4472 else
4473 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4474
4475 // Returns a flag for retval copy to use.
4476 if (!IsSibcall) {
4477 Chain = DAG.getCALLSEQ_END(Chain,
4478 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4479 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4480 true),
4481 InFlag, dl);
4482 InFlag = Chain.getValue(1);
4483 }
4484
4485 // Handle result values, copying them out of physregs into vregs that we
4486 // return.
4487 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4488 InVals, RegMask);
4489}
4490
4491//===----------------------------------------------------------------------===//
4492// Fast Calling Convention (tail call) implementation
4493//===----------------------------------------------------------------------===//
4494
4495// Like std call, callee cleans arguments, convention except that ECX is
4496// reserved for storing the tail called function address. Only 2 registers are
4497// free for argument passing (inreg). Tail call optimization is performed
4498// provided:
4499// * tailcallopt is enabled
4500// * caller/callee are fastcc
4501// On X86_64 architecture with GOT-style position independent code only local
4502// (within module) calls are supported at the moment.
4503// To keep the stack aligned according to platform abi the function
4504// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4505// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4506// If a tail called function callee has more arguments than the caller the
4507// caller needs to make sure that there is room to move the RETADDR to. This is
4508// achieved by reserving an area the size of the argument delta right after the
4509// original RETADDR, but before the saved framepointer or the spilled registers
4510// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4511// stack layout:
4512// arg1
4513// arg2
4514// RETADDR
4515// [ new RETADDR
4516// move area ]
4517// (possible EBP)
4518// ESI
4519// EDI
4520// local1 ..
4521
4522/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4523/// requirement.
4524unsigned
4525X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4526 SelectionDAG &DAG) const {
4527 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4528 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4529 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4530, __extension__ __PRETTY_FUNCTION__))
4530 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4530, __extension__ __PRETTY_FUNCTION__))
;
4531 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4532}
4533
4534/// Return true if the given stack call argument is already available in the
4535/// same position (relatively) of the caller's incoming argument stack.
4536static
4537bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4538 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4539 const X86InstrInfo *TII, const CCValAssign &VA) {
4540 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4541
4542 for (;;) {
4543 // Look through nodes that don't alter the bits of the incoming value.
4544 unsigned Op = Arg.getOpcode();
4545 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4546 Arg = Arg.getOperand(0);
4547 continue;
4548 }
4549 if (Op == ISD::TRUNCATE) {
4550 const SDValue &TruncInput = Arg.getOperand(0);
4551 if (TruncInput.getOpcode() == ISD::AssertZext &&
4552 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4553 Arg.getValueType()) {
4554 Arg = TruncInput.getOperand(0);
4555 continue;
4556 }
4557 }
4558 break;
4559 }
4560
4561 int FI = INT_MAX2147483647;
4562 if (Arg.getOpcode() == ISD::CopyFromReg) {
4563 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4564 if (!VR.isVirtual())
4565 return false;
4566 MachineInstr *Def = MRI->getVRegDef(VR);
4567 if (!Def)
4568 return false;
4569 if (!Flags.isByVal()) {
4570 if (!TII->isLoadFromStackSlot(*Def, FI))
4571 return false;
4572 } else {
4573 unsigned Opcode = Def->getOpcode();
4574 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4575 Opcode == X86::LEA64_32r) &&
4576 Def->getOperand(1).isFI()) {
4577 FI = Def->getOperand(1).getIndex();
4578 Bytes = Flags.getByValSize();
4579 } else
4580 return false;
4581 }
4582 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4583 if (Flags.isByVal())
4584 // ByVal argument is passed in as a pointer but it's now being
4585 // dereferenced. e.g.
4586 // define @foo(%struct.X* %A) {
4587 // tail call @bar(%struct.X* byval %A)
4588 // }
4589 return false;
4590 SDValue Ptr = Ld->getBasePtr();
4591 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4592 if (!FINode)
4593 return false;
4594 FI = FINode->getIndex();
4595 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4596 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4597 FI = FINode->getIndex();
4598 Bytes = Flags.getByValSize();
4599 } else
4600 return false;
4601
4602 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4602, __extension__ __PRETTY_FUNCTION__))
;
4603 if (!MFI.isFixedObjectIndex(FI))
4604 return false;
4605
4606 if (Offset != MFI.getObjectOffset(FI))
4607 return false;
4608
4609 // If this is not byval, check that the argument stack object is immutable.
4610 // inalloca and argument copy elision can create mutable argument stack
4611 // objects. Byval objects can be mutated, but a byval call intends to pass the
4612 // mutated memory.
4613 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4614 return false;
4615
4616 if (VA.getLocVT().getFixedSizeInBits() >
4617 Arg.getValueSizeInBits().getFixedSize()) {
4618 // If the argument location is wider than the argument type, check that any
4619 // extension flags match.
4620 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4621 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4622 return false;
4623 }
4624 }
4625
4626 return Bytes == MFI.getObjectSize(FI);
4627}
4628
4629/// Check whether the call is eligible for tail call optimization. Targets
4630/// that want to do tail call optimization should implement this function.
4631bool X86TargetLowering::IsEligibleForTailCallOptimization(
4632 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4633 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4634 const SmallVectorImpl<ISD::OutputArg> &Outs,
4635 const SmallVectorImpl<SDValue> &OutVals,
4636 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4637 if (!mayTailCallThisCC(CalleeCC))
4638 return false;
4639
4640 // If -tailcallopt is specified, make fastcc functions tail-callable.
4641 MachineFunction &MF = DAG.getMachineFunction();
4642 const Function &CallerF = MF.getFunction();
4643
4644 // If the function return type is x86_fp80 and the callee return type is not,
4645 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4646 // perform a tailcall optimization here.
4647 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4648 return false;
4649
4650 CallingConv::ID CallerCC = CallerF.getCallingConv();
4651 bool CCMatch = CallerCC == CalleeCC;
4652 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4653 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4654 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4655 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
4656
4657 // Win64 functions have extra shadow space for argument homing. Don't do the
4658 // sibcall if the caller and callee have mismatched expectations for this
4659 // space.
4660 if (IsCalleeWin64 != IsCallerWin64)
4661 return false;
4662
4663 if (IsGuaranteeTCO) {
4664 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4665 return true;
4666 return false;
4667 }
4668
4669 // Look for obvious safe cases to perform tail call optimization that do not
4670 // require ABI changes. This is what gcc calls sibcall.
4671
4672 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4673 // emit a special epilogue.
4674 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4675 if (RegInfo->hasStackRealignment(MF))
4676 return false;
4677
4678 // Also avoid sibcall optimization if either caller or callee uses struct
4679 // return semantics.
4680 if (isCalleeStructRet || isCallerStructRet)
4681 return false;
4682
4683 // Do not sibcall optimize vararg calls unless all arguments are passed via
4684 // registers.
4685 LLVMContext &C = *DAG.getContext();
4686 if (isVarArg && !Outs.empty()) {
4687 // Optimizing for varargs on Win64 is unlikely to be safe without
4688 // additional testing.
4689 if (IsCalleeWin64 || IsCallerWin64)
4690 return false;
4691
4692 SmallVector<CCValAssign, 16> ArgLocs;
4693 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4694
4695 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4696 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4697 if (!ArgLocs[i].isRegLoc())
4698 return false;
4699 }
4700
4701 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4702 // stack. Therefore, if it's not used by the call it is not safe to optimize
4703 // this into a sibcall.
4704 bool Unused = false;
4705 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4706 if (!Ins[i].Used) {
4707 Unused = true;
4708 break;
4709 }
4710 }
4711 if (Unused) {
4712 SmallVector<CCValAssign, 16> RVLocs;
4713 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4714 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4715 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4716 CCValAssign &VA = RVLocs[i];
4717 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4718 return false;
4719 }
4720 }
4721
4722 // Check that the call results are passed in the same way.
4723 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4724 RetCC_X86, RetCC_X86))
4725 return false;
4726 // The callee has to preserve all registers the caller needs to preserve.
4727 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4728 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4729 if (!CCMatch) {
4730 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4731 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4732 return false;
4733 }
4734
4735 unsigned StackArgsSize = 0;
4736
4737 // If the callee takes no arguments then go on to check the results of the
4738 // call.
4739 if (!Outs.empty()) {
4740 // Check if stack adjustment is needed. For now, do not do this if any
4741 // argument is passed on the stack.
4742 SmallVector<CCValAssign, 16> ArgLocs;
4743 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4744
4745 // Allocate shadow area for Win64
4746 if (IsCalleeWin64)
4747 CCInfo.AllocateStack(32, Align(8));
4748
4749 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4750 StackArgsSize = CCInfo.getNextStackOffset();
4751
4752 if (CCInfo.getNextStackOffset()) {
4753 // Check if the arguments are already laid out in the right way as
4754 // the caller's fixed stack objects.
4755 MachineFrameInfo &MFI = MF.getFrameInfo();
4756 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4757 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4758 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4759 CCValAssign &VA = ArgLocs[i];
4760 SDValue Arg = OutVals[i];
4761 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4762 if (VA.getLocInfo() == CCValAssign::Indirect)
4763 return false;
4764 if (!VA.isRegLoc()) {
4765 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4766 MFI, MRI, TII, VA))
4767 return false;
4768 }
4769 }
4770 }
4771
4772 bool PositionIndependent = isPositionIndependent();
4773 // If the tailcall address may be in a register, then make sure it's
4774 // possible to register allocate for it. In 32-bit, the call address can
4775 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4776 // callee-saved registers are restored. These happen to be the same
4777 // registers used to pass 'inreg' arguments so watch out for those.
4778 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4779 !isa<ExternalSymbolSDNode>(Callee)) ||
4780 PositionIndependent)) {
4781 unsigned NumInRegs = 0;
4782 // In PIC we need an extra register to formulate the address computation
4783 // for the callee.
4784 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4785
4786 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4787 CCValAssign &VA = ArgLocs[i];
4788 if (!VA.isRegLoc())
4789 continue;
4790 Register Reg = VA.getLocReg();
4791 switch (Reg) {
4792 default: break;
4793 case X86::EAX: case X86::EDX: case X86::ECX:
4794 if (++NumInRegs == MaxInRegs)
4795 return false;
4796 break;
4797 }
4798 }
4799 }
4800
4801 const MachineRegisterInfo &MRI = MF.getRegInfo();
4802 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4803 return false;
4804 }
4805
4806 bool CalleeWillPop =
4807 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4808 MF.getTarget().Options.GuaranteedTailCallOpt);
4809
4810 if (unsigned BytesToPop =
4811 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4812 // If we have bytes to pop, the callee must pop them.
4813 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4814 if (!CalleePopMatches)
4815 return false;
4816 } else if (CalleeWillPop && StackArgsSize > 0) {
4817 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4818 return false;
4819 }
4820
4821 return true;
4822}
4823
4824FastISel *
4825X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4826 const TargetLibraryInfo *libInfo) const {
4827 return X86::createFastISel(funcInfo, libInfo);
4828}
4829
4830//===----------------------------------------------------------------------===//
4831// Other Lowering Hooks
4832//===----------------------------------------------------------------------===//
4833
4834static bool MayFoldLoad(SDValue Op) {
4835 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4836}
4837
4838static bool MayFoldIntoStore(SDValue Op) {
4839 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4840}
4841
4842static bool MayFoldIntoZeroExtend(SDValue Op) {
4843 if (Op.hasOneUse()) {
4844 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4845 return (ISD::ZERO_EXTEND == Opcode);
4846 }
4847 return false;
4848}
4849
4850static bool isTargetShuffle(unsigned Opcode) {
4851 switch(Opcode) {
4852 default: return false;
4853 case X86ISD::BLENDI:
4854 case X86ISD::PSHUFB:
4855 case X86ISD::PSHUFD:
4856 case X86ISD::PSHUFHW:
4857 case X86ISD::PSHUFLW:
4858 case X86ISD::SHUFP:
4859 case X86ISD::INSERTPS:
4860 case X86ISD::EXTRQI:
4861 case X86ISD::INSERTQI:
4862 case X86ISD::VALIGN:
4863 case X86ISD::PALIGNR:
4864 case X86ISD::VSHLDQ:
4865 case X86ISD::VSRLDQ:
4866 case X86ISD::MOVLHPS:
4867 case X86ISD::MOVHLPS:
4868 case X86ISD::MOVSHDUP:
4869 case X86ISD::MOVSLDUP:
4870 case X86ISD::MOVDDUP:
4871 case X86ISD::MOVSS:
4872 case X86ISD::MOVSD:
4873 case X86ISD::UNPCKL:
4874 case X86ISD::UNPCKH:
4875 case X86ISD::VBROADCAST:
4876 case X86ISD::VPERMILPI:
4877 case X86ISD::VPERMILPV:
4878 case X86ISD::VPERM2X128:
4879 case X86ISD::SHUF128:
4880 case X86ISD::VPERMIL2:
4881 case X86ISD::VPERMI:
4882 case X86ISD::VPPERM:
4883 case X86ISD::VPERMV:
4884 case X86ISD::VPERMV3:
4885 case X86ISD::VZEXT_MOVL:
4886 return true;
4887 }
4888}
4889
4890static bool isTargetShuffleVariableMask(unsigned Opcode) {
4891 switch (Opcode) {
4892 default: return false;
4893 // Target Shuffles.
4894 case X86ISD::PSHUFB:
4895 case X86ISD::VPERMILPV:
4896 case X86ISD::VPERMIL2:
4897 case X86ISD::VPPERM:
4898 case X86ISD::VPERMV:
4899 case X86ISD::VPERMV3:
4900 return true;
4901 // 'Faux' Target Shuffles.
4902 case ISD::OR:
4903 case ISD::AND:
4904 case X86ISD::ANDNP:
4905 return true;
4906 }
4907}
4908
4909static bool isTargetShuffleSplat(SDValue Op) {
4910 unsigned Opcode = Op.getOpcode();
4911 if (Opcode == ISD::EXTRACT_SUBVECTOR)
4912 return isTargetShuffleSplat(Op.getOperand(0));
4913 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
4914}
4915
4916SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4917 MachineFunction &MF = DAG.getMachineFunction();
4918 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4919 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4920 int ReturnAddrIndex = FuncInfo->getRAIndex();
4921
4922 if (ReturnAddrIndex == 0) {
4923 // Set up a frame object for the return address.
4924 unsigned SlotSize = RegInfo->getSlotSize();
4925 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4926 -(int64_t)SlotSize,
4927 false);
4928 FuncInfo->setRAIndex(ReturnAddrIndex);
4929 }
4930
4931 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4932}
4933
4934bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4935 bool hasSymbolicDisplacement) {
4936 // Offset should fit into 32 bit immediate field.
4937 if (!isInt<32>(Offset))
4938 return false;
4939
4940 // If we don't have a symbolic displacement - we don't have any extra
4941 // restrictions.
4942 if (!hasSymbolicDisplacement)
4943 return true;
4944
4945 // FIXME: Some tweaks might be needed for medium code model.
4946 if (M != CodeModel::Small && M != CodeModel::Kernel)
4947 return false;
4948
4949 // For small code model we assume that latest object is 16MB before end of 31
4950 // bits boundary. We may also accept pretty large negative constants knowing
4951 // that all objects are in the positive half of address space.
4952 if (M == CodeModel::Small && Offset < 16*1024*1024)
4953 return true;
4954
4955 // For kernel code model we know that all object resist in the negative half
4956 // of 32bits address space. We may not accept negative offsets, since they may
4957 // be just off and we may accept pretty large positive ones.
4958 if (M == CodeModel::Kernel && Offset >= 0)
4959 return true;
4960
4961 return false;
4962}
4963
4964/// Determines whether the callee is required to pop its own arguments.
4965/// Callee pop is necessary to support tail calls.
4966bool X86::isCalleePop(CallingConv::ID CallingConv,
4967 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4968 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4969 // can guarantee TCO.
4970 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4971 return true;
4972
4973 switch (CallingConv) {
4974 default:
4975 return false;
4976 case CallingConv::X86_StdCall:
4977 case CallingConv::X86_FastCall:
4978 case CallingConv::X86_ThisCall:
4979 case CallingConv::X86_VectorCall:
4980 return !is64Bit;
4981 }
4982}
4983
4984/// Return true if the condition is an signed comparison operation.
4985static bool isX86CCSigned(unsigned X86CC) {
4986 switch (X86CC) {
4987 default:
4988 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4988)
;
4989 case X86::COND_E:
4990 case X86::COND_NE:
4991 case X86::COND_B:
4992 case X86::COND_A:
4993 case X86::COND_BE:
4994 case X86::COND_AE:
4995 return false;
4996 case X86::COND_G:
4997 case X86::COND_GE:
4998 case X86::COND_L:
4999 case X86::COND_LE:
5000 return true;
5001 }
5002}
5003
5004static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5005 switch (SetCCOpcode) {
5006 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5006)
;
5007 case ISD::SETEQ: return X86::COND_E;
5008 case ISD::SETGT: return X86::COND_G;
5009 case ISD::SETGE: return X86::COND_GE;
5010 case ISD::SETLT: return X86::COND_L;
5011 case ISD::SETLE: return X86::COND_LE;
5012 case ISD::SETNE: return X86::COND_NE;
5013 case ISD::SETULT: return X86::COND_B;
5014 case ISD::SETUGT: return X86::COND_A;
5015 case ISD::SETULE: return X86::COND_BE;
5016 case ISD::SETUGE: return X86::COND_AE;
5017 }
5018}
5019
5020/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5021/// condition code, returning the condition code and the LHS/RHS of the
5022/// comparison to make.
5023static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5024 bool isFP, SDValue &LHS, SDValue &RHS,
5025 SelectionDAG &DAG) {
5026 if (!isFP) {
5027 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5028 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
5029 // X > -1 -> X == 0, jump !sign.
5030 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5031 return X86::COND_NS;
5032 }
5033 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
5034 // X < 0 -> X == 0, jump on sign.
5035 return X86::COND_S;
5036 }
5037 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
5038 // X >= 0 -> X == 0, jump on !sign.
5039 return X86::COND_NS;
5040 }
5041 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5042 // X < 1 -> X <= 0
5043 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5044 return X86::COND_LE;
5045 }
5046 }
5047
5048 return TranslateIntegerX86CC(SetCCOpcode);
5049 }
5050
5051 // First determine if it is required or is profitable to flip the operands.
5052
5053 // If LHS is a foldable load, but RHS is not, flip the condition.
5054 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5055 !ISD::isNON_EXTLoad(RHS.getNode())) {
5056 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5057 std::swap(LHS, RHS);
5058 }
5059
5060 switch (SetCCOpcode) {
5061 default: break;
5062 case ISD::SETOLT:
5063 case ISD::SETOLE:
5064 case ISD::SETUGT:
5065 case ISD::SETUGE:
5066 std::swap(LHS, RHS);
5067 break;
5068 }
5069
5070 // On a floating point condition, the flags are set as follows:
5071 // ZF PF CF op
5072 // 0 | 0 | 0 | X > Y
5073 // 0 | 0 | 1 | X < Y
5074 // 1 | 0 | 0 | X == Y
5075 // 1 | 1 | 1 | unordered
5076 switch (SetCCOpcode) {
5077 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5077)
;
5078 case ISD::SETUEQ:
5079 case ISD::SETEQ: return X86::COND_E;
5080 case ISD::SETOLT: // flipped
5081 case ISD::SETOGT:
5082 case ISD::SETGT: return X86::COND_A;
5083 case ISD::SETOLE: // flipped
5084 case ISD::SETOGE:
5085 case ISD::SETGE: return X86::COND_AE;
5086 case ISD::SETUGT: // flipped
5087 case ISD::SETULT:
5088 case ISD::SETLT: return X86::COND_B;
5089 case ISD::SETUGE: // flipped
5090 case ISD::SETULE:
5091 case ISD::SETLE: return X86::COND_BE;
5092 case ISD::SETONE:
5093 case ISD::SETNE: return X86::COND_NE;
5094 case ISD::SETUO: return X86::COND_P;
5095 case ISD::SETO: return X86::COND_NP;
5096 case ISD::SETOEQ:
5097 case ISD::SETUNE: return X86::COND_INVALID;
5098 }
5099}
5100
5101/// Is there a floating point cmov for the specific X86 condition code?
5102/// Current x86 isa includes the following FP cmov instructions:
5103/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5104static bool hasFPCMov(unsigned X86CC) {
5105 switch (X86CC) {
5106 default:
5107 return false;
5108 case X86::COND_B:
5109 case X86::COND_BE:
5110 case X86::COND_E:
5111 case X86::COND_P:
5112 case X86::COND_A:
5113 case X86::COND_AE:
5114 case X86::COND_NE:
5115 case X86::COND_NP:
5116 return true;
5117 }
5118}
5119
5120
5121bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5122 const CallInst &I,
5123 MachineFunction &MF,
5124 unsigned Intrinsic) const {
5125 Info.flags = MachineMemOperand::MONone;
5126 Info.offset = 0;
5127
5128 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5129 if (!IntrData) {
5130 switch (Intrinsic) {
5131 case Intrinsic::x86_aesenc128kl:
5132 case Intrinsic::x86_aesdec128kl:
5133 Info.opc = ISD::INTRINSIC_W_CHAIN;
5134 Info.ptrVal = I.getArgOperand(1);
5135 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5136 Info.align = Align(1);
5137 Info.flags |= MachineMemOperand::MOLoad;
5138 return true;
5139 case Intrinsic::x86_aesenc256kl:
5140 case Intrinsic::x86_aesdec256kl:
5141 Info.opc = ISD::INTRINSIC_W_CHAIN;
5142 Info.ptrVal = I.getArgOperand(1);
5143 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5144 Info.align = Align(1);
5145 Info.flags |= MachineMemOperand::MOLoad;
5146 return true;
5147 case Intrinsic::x86_aesencwide128kl:
5148 case Intrinsic::x86_aesdecwide128kl:
5149 Info.opc = ISD::INTRINSIC_W_CHAIN;
5150 Info.ptrVal = I.getArgOperand(0);
5151 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5152 Info.align = Align(1);
5153 Info.flags |= MachineMemOperand::MOLoad;
5154 return true;
5155 case Intrinsic::x86_aesencwide256kl:
5156 case Intrinsic::x86_aesdecwide256kl:
5157 Info.opc = ISD::INTRINSIC_W_CHAIN;
5158 Info.ptrVal = I.getArgOperand(0);
5159 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5160 Info.align = Align(1);
5161 Info.flags |= MachineMemOperand::MOLoad;
5162 return true;
5163 }
5164 return false;
5165 }
5166
5167 switch (IntrData->Type) {
5168 case TRUNCATE_TO_MEM_VI8:
5169 case TRUNCATE_TO_MEM_VI16:
5170 case TRUNCATE_TO_MEM_VI32: {
5171 Info.opc = ISD::INTRINSIC_VOID;
5172 Info.ptrVal = I.getArgOperand(0);
5173 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5174 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5175 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5176 ScalarVT = MVT::i8;
5177 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5178 ScalarVT = MVT::i16;
5179 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5180 ScalarVT = MVT::i32;
5181
5182 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5183 Info.align = Align(1);
5184 Info.flags |= MachineMemOperand::MOStore;
5185 break;
5186 }
5187 case GATHER:
5188 case GATHER_AVX2: {
5189 Info.opc = ISD::INTRINSIC_W_CHAIN;
5190 Info.ptrVal = nullptr;
5191 MVT DataVT = MVT::getVT(I.getType());
5192 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5193 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5194 IndexVT.getVectorNumElements());
5195 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5196 Info.align = Align(1);
5197 Info.flags |= MachineMemOperand::MOLoad;
5198 break;
5199 }
5200 case SCATTER: {
5201 Info.opc = ISD::INTRINSIC_VOID;
5202 Info.ptrVal = nullptr;
5203 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5204 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5205 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5206 IndexVT.getVectorNumElements());
5207 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5208 Info.align = Align(1);
5209 Info.flags |= MachineMemOperand::MOStore;
5210 break;
5211 }
5212 default:
5213 return false;
5214 }
5215
5216 return true;
5217}
5218
5219/// Returns true if the target can instruction select the
5220/// specified FP immediate natively. If false, the legalizer will
5221/// materialize the FP immediate as a load from a constant pool.
5222bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5223 bool ForCodeSize) const {
5224 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5225 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5226 return true;
5227 }
5228 return false;
5229}
5230
5231bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5232 ISD::LoadExtType ExtTy,
5233 EVT NewVT) const {
5234 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5234, __extension__ __PRETTY_FUNCTION__))
;
5235
5236 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5237 // relocation target a movq or addq instruction: don't let the load shrink.
5238 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5239 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5240 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5241 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5242
5243 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5244 // those uses are extracted directly into a store, then the extract + store
5245 // can be store-folded. Therefore, it's probably not worth splitting the load.
5246 EVT VT = Load->getValueType(0);
5247 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5248 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5249 // Skip uses of the chain value. Result 0 of the node is the load value.
5250 if (UI.getUse().getResNo() != 0)
5251 continue;
5252
5253 // If this use is not an extract + store, it's probably worth splitting.
5254 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5255 UI->use_begin()->getOpcode() != ISD::STORE)
5256 return true;
5257 }
5258 // All non-chain uses are extract + store.
5259 return false;
5260 }
5261
5262 return true;
5263}
5264
5265/// Returns true if it is beneficial to convert a load of a constant
5266/// to just the constant itself.
5267bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5268 Type *Ty) const {
5269 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5269, __extension__ __PRETTY_FUNCTION__))
;
5270
5271 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5272 if (BitSize == 0 || BitSize > 64)
5273 return false;
5274 return true;
5275}
5276
5277bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5278 // If we are using XMM registers in the ABI and the condition of the select is
5279 // a floating-point compare and we have blendv or conditional move, then it is
5280 // cheaper to select instead of doing a cross-register move and creating a
5281 // load that depends on the compare result.
5282 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5283 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5284}
5285
5286bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5287 // TODO: It might be a win to ease or lift this restriction, but the generic
5288 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5289 if (VT.isVector() && Subtarget.hasAVX512())
5290 return false;
5291
5292 return true;
5293}
5294
5295bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5296 SDValue C) const {
5297 // TODO: We handle scalars using custom code, but generic combining could make
5298 // that unnecessary.
5299 APInt MulC;
5300 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5301 return false;
5302
5303 // Find the type this will be legalized too. Otherwise we might prematurely
5304 // convert this to shl+add/sub and then still have to type legalize those ops.
5305 // Another choice would be to defer the decision for illegal types until
5306 // after type legalization. But constant splat vectors of i64 can't make it
5307 // through type legalization on 32-bit targets so we would need to special
5308 // case vXi64.
5309 while (getTypeAction(Context, VT) != TypeLegal)
5310 VT = getTypeToTransformTo(Context, VT);
5311
5312 // If vector multiply is legal, assume that's faster than shl + add/sub.
5313 // TODO: Multiply is a complex op with higher latency and lower throughput in
5314 // most implementations, so this check could be loosened based on type
5315 // and/or a CPU attribute.
5316 if (isOperationLegal(ISD::MUL, VT))
5317 return false;
5318
5319 // shl+add, shl+sub, shl+add+neg
5320 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5321 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5322}
5323
5324bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5325 unsigned Index) const {
5326 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5327 return false;
5328
5329 // Mask vectors support all subregister combinations and operations that
5330 // extract half of vector.
5331 if (ResVT.getVectorElementType() == MVT::i1)
5332 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5333 (Index == ResVT.getVectorNumElements()));
5334
5335 return (Index % ResVT.getVectorNumElements()) == 0;
5336}
5337
5338bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5339 unsigned Opc = VecOp.getOpcode();
5340
5341 // Assume target opcodes can't be scalarized.
5342 // TODO - do we have any exceptions?
5343 if (Opc >= ISD::BUILTIN_OP_END)
5344 return false;
5345
5346 // If the vector op is not supported, try to convert to scalar.
5347 EVT VecVT = VecOp.getValueType();
5348 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5349 return true;
5350
5351 // If the vector op is supported, but the scalar op is not, the transform may
5352 // not be worthwhile.
5353 EVT ScalarVT = VecVT.getScalarType();
5354 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5355}
5356
5357bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5358 bool) const {
5359 // TODO: Allow vectors?
5360 if (VT.isVector())
5361 return false;
5362 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5363}
5364
5365bool X86TargetLowering::isCheapToSpeculateCttz() const {
5366 // Speculate cttz only if we can directly use TZCNT.
5367 return Subtarget.hasBMI();
5368}
5369
5370bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5371 // Speculate ctlz only if we can directly use LZCNT.
5372 return Subtarget.hasLZCNT();
5373}
5374
5375bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5376 const SelectionDAG &DAG,
5377 const MachineMemOperand &MMO) const {
5378 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5379 BitcastVT.getVectorElementType() == MVT::i1)
5380 return false;
5381
5382 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5383 return false;
5384
5385 // If both types are legal vectors, it's always ok to convert them.
5386 if (LoadVT.isVector() && BitcastVT.isVector() &&
5387 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5388 return true;
5389
5390 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5391}
5392
5393bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5394 const SelectionDAG &DAG) const {
5395 // Do not merge to float value size (128 bytes) if no implicit
5396 // float attribute is set.
5397 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5398 Attribute::NoImplicitFloat);
5399
5400 if (NoFloat) {
5401 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5402 return (MemVT.getSizeInBits() <= MaxIntSize);
5403 }
5404 // Make sure we don't merge greater than our preferred vector
5405 // width.
5406 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5407 return false;
5408
5409 return true;
5410}
5411
5412bool X86TargetLowering::isCtlzFast() const {
5413 return Subtarget.hasFastLZCNT();
5414}
5415
5416bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5417 const Instruction &AndI) const {
5418 return true;
5419}
5420
5421bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5422 EVT VT = Y.getValueType();
5423
5424 if (VT.isVector())
5425 return false;
5426
5427 if (!Subtarget.hasBMI())
5428 return false;
5429
5430 // There are only 32-bit and 64-bit forms for 'andn'.
5431 if (VT != MVT::i32 && VT != MVT::i64)
5432 return false;
5433
5434 return !isa<ConstantSDNode>(Y);
5435}
5436
5437bool X86TargetLowering::hasAndNot(SDValue Y) const {
5438 EVT VT = Y.getValueType();
5439
5440 if (!VT.isVector())
5441 return hasAndNotCompare(Y);
5442
5443 // Vector.
5444
5445 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5446 return false;
5447
5448 if (VT == MVT::v4i32)
5449 return true;
5450
5451 return Subtarget.hasSSE2();
5452}
5453
5454bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5455 return X.getValueType().isScalarInteger(); // 'bt'
5456}
5457
5458bool X86TargetLowering::
5459 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5460 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5461 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5462 SelectionDAG &DAG) const {
5463 // Does baseline recommend not to perform the fold by default?
5464 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5465 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5466 return false;
5467 // For scalars this transform is always beneficial.
5468 if (X.getValueType().isScalarInteger())
5469 return true;
5470 // If all the shift amounts are identical, then transform is beneficial even
5471 // with rudimentary SSE2 shifts.
5472 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5473 return true;
5474 // If we have AVX2 with it's powerful shift operations, then it's also good.
5475 if (Subtarget.hasAVX2())
5476 return true;
5477 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5478 return NewShiftOpcode == ISD::SHL;
5479}
5480
5481bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5482 const SDNode *N, CombineLevel Level) const {
5483 assert(((N->getOpcode() == ISD::SHL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5487, __extension__ __PRETTY_FUNCTION__))
5484 N->getOperand(0).getOpcode() == ISD::SRL) ||(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5487, __extension__ __PRETTY_FUNCTION__))
5485 (N->getOpcode() == ISD::SRL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5487, __extension__ __PRETTY_FUNCTION__))
5486 N->getOperand(0).getOpcode() == ISD::SHL)) &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5487, __extension__ __PRETTY_FUNCTION__))
5487 "Expected shift-shift mask")(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5487, __extension__ __PRETTY_FUNCTION__))
;
5488 EVT VT = N->getValueType(0);
5489 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5490 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5491 // Only fold if the shift values are equal - so it folds to AND.
5492 // TODO - we should fold if either is a non-uniform vector but we don't do
5493 // the fold for non-splats yet.
5494 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5495 }
5496 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5497}
5498
5499bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5500 EVT VT = Y.getValueType();
5501
5502 // For vectors, we don't have a preference, but we probably want a mask.
5503 if (VT.isVector())
5504 return false;
5505
5506 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5507 if (VT == MVT::i64 && !Subtarget.is64Bit())
5508 return false;
5509
5510 return true;
5511}
5512
5513bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5514 SDNode *N) const {
5515 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5516 !Subtarget.isOSWindows())
5517 return false;
5518 return true;
5519}
5520
5521bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5522 // Any legal vector type can be splatted more efficiently than
5523 // loading/spilling from memory.
5524 return isTypeLegal(VT);
5525}
5526
5527MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5528 MVT VT = MVT::getIntegerVT(NumBits);
5529 if (isTypeLegal(VT))
5530 return VT;
5531
5532 // PMOVMSKB can handle this.
5533 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5534 return MVT::v16i8;
5535
5536 // VPMOVMSKB can handle this.
5537 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5538 return MVT::v32i8;
5539
5540 // TODO: Allow 64-bit type for 32-bit target.
5541 // TODO: 512-bit types should be allowed, but make sure that those
5542 // cases are handled in combineVectorSizedSetCCEquality().
5543
5544 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5545}
5546
5547/// Val is the undef sentinel value or equal to the specified value.
5548static bool isUndefOrEqual(int Val, int CmpVal) {
5549 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5550}
5551
5552/// Return true if every element in Mask is the undef sentinel value or equal to
5553/// the specified value..
5554static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
5555 return llvm::all_of(Mask, [CmpVal](int M) {
5556 return (M == SM_SentinelUndef) || (M == CmpVal);
5557 });
5558}
5559
5560/// Val is either the undef or zero sentinel value.
5561static bool isUndefOrZero(int Val) {
5562 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5563}
5564
5565/// Return true if every element in Mask, beginning from position Pos and ending
5566/// in Pos+Size is the undef sentinel value.
5567static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5568 return llvm::all_of(Mask.slice(Pos, Size),
5569 [](int M) { return M == SM_SentinelUndef; });
5570}
5571
5572/// Return true if the mask creates a vector whose lower half is undefined.
5573static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5574 unsigned NumElts = Mask.size();
5575 return isUndefInRange(Mask, 0, NumElts / 2);
5576}
5577
5578/// Return true if the mask creates a vector whose upper half is undefined.
5579static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5580 unsigned NumElts = Mask.size();
5581 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5582}
5583
5584/// Return true if Val falls within the specified range (L, H].
5585static bool isInRange(int Val, int Low, int Hi) {
5586 return (Val >= Low && Val < Hi);
5587}
5588
5589/// Return true if the value of any element in Mask falls within the specified
5590/// range (L, H].
5591static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5592 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5593}
5594
5595/// Return true if the value of any element in Mask is the zero sentinel value.
5596static bool isAnyZero(ArrayRef<int> Mask) {
5597 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5598}
5599
5600/// Return true if the value of any element in Mask is the zero or undef
5601/// sentinel values.
5602static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5603 return llvm::any_of(Mask, [](int M) {
5604 return M == SM_SentinelZero || M == SM_SentinelUndef;
5605 });
5606}
5607
5608/// Return true if Val is undef or if its value falls within the
5609/// specified range (L, H].
5610static bool isUndefOrInRange(int Val, int Low, int Hi) {
5611 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5612}
5613
5614/// Return true if every element in Mask is undef or if its value
5615/// falls within the specified range (L, H].
5616static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5617 return llvm::all_of(
5618 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5619}
5620
5621/// Return true if Val is undef, zero or if its value falls within the
5622/// specified range (L, H].
5623static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5624 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5625}
5626
5627/// Return true if every element in Mask is undef, zero or if its value
5628/// falls within the specified range (L, H].
5629static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5630 return llvm::all_of(
5631 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5632}
5633
5634/// Return true if every element in Mask, beginning
5635/// from position Pos and ending in Pos + Size, falls within the specified
5636/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5637static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5638 unsigned Size, int Low, int Step = 1) {
5639 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5640 if (!isUndefOrEqual(Mask[i], Low))
5641 return false;
5642 return true;
5643}
5644
5645/// Return true if every element in Mask, beginning
5646/// from position Pos and ending in Pos+Size, falls within the specified
5647/// sequential range (Low, Low+Size], or is undef or is zero.
5648static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5649 unsigned Size, int Low,
5650 int Step = 1) {
5651 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5652 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5653 return false;
5654 return true;
5655}
5656
5657/// Return true if every element in Mask, beginning
5658/// from position Pos and ending in Pos+Size is undef or is zero.
5659static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5660 unsigned Size) {
5661 return llvm::all_of(Mask.slice(Pos, Size),
5662 [](int M) { return isUndefOrZero(M); });
5663}
5664
5665/// Helper function to test whether a shuffle mask could be
5666/// simplified by widening the elements being shuffled.
5667///
5668/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5669/// leaves it in an unspecified state.
5670///
5671/// NOTE: This must handle normal vector shuffle masks and *target* vector
5672/// shuffle masks. The latter have the special property of a '-2' representing
5673/// a zero-ed lane of a vector.
5674static bool canWidenShuffleElements(ArrayRef<int> Mask,
5675 SmallVectorImpl<int> &WidenedMask) {
5676 WidenedMask.assign(Mask.size() / 2, 0);
5677 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5678 int M0 = Mask[i];
5679 int M1 = Mask[i + 1];
5680
5681 // If both elements are undef, its trivial.
5682 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5683 WidenedMask[i / 2] = SM_SentinelUndef;
5684 continue;
5685 }
5686
5687 // Check for an undef mask and a mask value properly aligned to fit with
5688 // a pair of values. If we find such a case, use the non-undef mask's value.
5689 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5690 WidenedMask[i / 2] = M1 / 2;
5691 continue;
5692 }
5693 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5694 WidenedMask[i / 2] = M0 / 2;
5695 continue;
5696 }
5697
5698 // When zeroing, we need to spread the zeroing across both lanes to widen.
5699 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5700 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5701 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5702 WidenedMask[i / 2] = SM_SentinelZero;
5703 continue;
5704 }
5705 return false;
5706 }
5707
5708 // Finally check if the two mask values are adjacent and aligned with
5709 // a pair.
5710 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5711 WidenedMask[i / 2] = M0 / 2;
5712 continue;
5713 }
5714
5715 // Otherwise we can't safely widen the elements used in this shuffle.
5716 return false;
5717 }
5718 assert(WidenedMask.size() == Mask.size() / 2 &&(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5719, __extension__ __PRETTY_FUNCTION__))
5719 "Incorrect size of mask after widening the elements!")(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5719, __extension__ __PRETTY_FUNCTION__))
;
5720
5721 return true;
5722}
5723
5724static bool canWidenShuffleElements(ArrayRef<int> Mask,
5725 const APInt &Zeroable,
5726 bool V2IsZero,
5727 SmallVectorImpl<int> &WidenedMask) {
5728 // Create an alternative mask with info about zeroable elements.
5729 // Here we do not set undef elements as zeroable.
5730 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5731 if (V2IsZero) {
5732 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")(static_cast <bool> (!Zeroable.isNullValue() &&
"V2's non-undef elements are used?!") ? void (0) : __assert_fail
("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5732, __extension__ __PRETTY_FUNCTION__))
;
5733 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5734 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5735 ZeroableMask[i] = SM_SentinelZero;
5736 }
5737 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5738}
5739
5740static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5741 SmallVector<int, 32> WidenedMask;
5742 return canWidenShuffleElements(Mask, WidenedMask);
5743}
5744
5745// Attempt to narrow/widen shuffle mask until it matches the target number of
5746// elements.
5747static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
5748 SmallVectorImpl<int> &ScaledMask) {
5749 unsigned NumSrcElts = Mask.size();
5750 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&(static_cast <bool> (((NumSrcElts % NumDstElts) == 0 ||
(NumDstElts % NumSrcElts) == 0) && "Illegal shuffle scale factor"
) ? void (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5751, __extension__ __PRETTY_FUNCTION__))
5751 "Illegal shuffle scale factor")(static_cast <bool> (((NumSrcElts % NumDstElts) == 0 ||
(NumDstElts % NumSrcElts) == 0) && "Illegal shuffle scale factor"
) ? void (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5751, __extension__ __PRETTY_FUNCTION__))
;
5752
5753 // Narrowing is guaranteed to work.
5754 if (NumDstElts >= NumSrcElts) {
5755 int Scale = NumDstElts / NumSrcElts;
5756 llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask);
5757 return true;
5758 }
5759
5760 // We have to repeat the widening until we reach the target size, but we can
5761 // split out the first widening as it sets up ScaledMask for us.
5762 if (canWidenShuffleElements(Mask, ScaledMask)) {
5763 while (ScaledMask.size() > NumDstElts) {
5764 SmallVector<int, 16> WidenedMask;
5765 if (!canWidenShuffleElements(ScaledMask, WidenedMask))
5766 return false;
5767 ScaledMask = std::move(WidenedMask);
5768 }
5769 return true;
5770 }
5771
5772 return false;
5773}
5774
5775/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5776bool X86::isZeroNode(SDValue Elt) {
5777 return isNullConstant(Elt) || isNullFPConstant(Elt);
5778}
5779
5780// Build a vector of constants.
5781// Use an UNDEF node if MaskElt == -1.
5782// Split 64-bit constants in the 32-bit mode.
5783static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5784 const SDLoc &dl, bool IsMask = false) {
5785
5786 SmallVector<SDValue, 32> Ops;
5787 bool Split = false;
5788
5789 MVT ConstVecVT = VT;
5790 unsigned NumElts = VT.getVectorNumElements();
5791 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5792 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5793 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5794 Split = true;
5795 }
5796
5797 MVT EltVT = ConstVecVT.getVectorElementType();
5798 for (unsigned i = 0; i < NumElts; ++i) {
5799 bool IsUndef = Values[i] < 0 && IsMask;
5800 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5801 DAG.getConstant(Values[i], dl, EltVT);
5802 Ops.push_back(OpNode);
5803 if (Split)
5804 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5805 DAG.getConstant(0, dl, EltVT));
5806 }
5807 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5808 if (Split)
5809 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5810 return ConstsNode;
5811}
5812
5813static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5814 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5815 assert(Bits.size() == Undefs.getBitWidth() &&(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5816, __extension__ __PRETTY_FUNCTION__))
5816 "Unequal constant and undef arrays")(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5816, __extension__ __PRETTY_FUNCTION__))
;
5817 SmallVector<SDValue, 32> Ops;
5818 bool Split = false;
5819
5820 MVT ConstVecVT = VT;
5821 unsigned NumElts = VT.getVectorNumElements();
5822 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5823 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5824 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5825 Split = true;
5826 }
5827
5828 MVT EltVT = ConstVecVT.getVectorElementType();
5829 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5830 if (Undefs[i]) {
5831 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5832 continue;
5833 }
5834 const APInt &V = Bits[i];
5835 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")(static_cast <bool> (V.getBitWidth() == VT.getScalarSizeInBits
() && "Unexpected sizes") ? void (0) : __assert_fail (
"V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5835, __extension__ __PRETTY_FUNCTION__))
;
5836 if (Split) {
5837 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5838 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5839 } else if (EltVT == MVT::f32) {
5840 APFloat FV(APFloat::IEEEsingle(), V);
5841 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5842 } else if (EltVT == MVT::f64) {
5843 APFloat FV(APFloat::IEEEdouble(), V);
5844 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5845 } else {
5846 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5847 }
5848 }
5849
5850 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5851 return DAG.getBitcast(VT, ConstsNode);
5852}
5853
5854/// Returns a vector of specified type with all zero elements.
5855static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5856 SelectionDAG &DAG, const SDLoc &dl) {
5857 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5859, __extension__ __PRETTY_FUNCTION__))
5858 VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5859, __extension__ __PRETTY_FUNCTION__))
5859 "Unexpected vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5859, __extension__ __PRETTY_FUNCTION__))
;
5860
5861 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5862 // type. This ensures they get CSE'd. But if the integer type is not
5863 // available, use a floating-point +0.0 instead.
5864 SDValue Vec;
5865 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5866 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5867 } else if (VT.isFloatingPoint()) {
5868 Vec = DAG.getConstantFP(+0.0, dl, VT);
5869 } else if (VT.getVectorElementType() == MVT::i1) {
5870 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5871, __extension__ __PRETTY_FUNCTION__))
5871 "Unexpected vector type")(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5871, __extension__ __PRETTY_FUNCTION__))
;
5872 Vec = DAG.getConstant(0, dl, VT);
5873 } else {
5874 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5875 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5876 }
5877 return DAG.getBitcast(VT, Vec);
5878}
5879
5880static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5881 const SDLoc &dl, unsigned vectorWidth) {
5882 EVT VT = Vec.getValueType();
5883 EVT ElVT = VT.getVectorElementType();
5884 unsigned Factor = VT.getSizeInBits() / vectorWidth;
5885 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5886 VT.getVectorNumElements() / Factor);
5887
5888 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5889 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5890 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5890, __extension__ __PRETTY_FUNCTION__))
;
5891
5892 // This is the index of the first element of the vectorWidth-bit chunk
5893 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5894 IdxVal &= ~(ElemsPerChunk - 1);
5895
5896 // If the input is a buildvector just emit a smaller one.
5897 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5898 return DAG.getBuildVector(ResultVT, dl,
5899 Vec->ops().slice(IdxVal, ElemsPerChunk));
5900
5901 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5902 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5903}
5904
5905/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5906/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5907/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5908/// instructions or a simple subregister reference. Idx is an index in the
5909/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5910/// lowering EXTRACT_VECTOR_ELT operations easier.
5911static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5912 SelectionDAG &DAG, const SDLoc &dl) {
5913 assert((Vec.getValueType().is256BitVector() ||(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5914, __extension__ __PRETTY_FUNCTION__))
5914 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5914, __extension__ __PRETTY_FUNCTION__))
;
5915 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5916}
5917
5918/// Generate a DAG to grab 256-bits from a 512-bit vector.
5919static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5920 SelectionDAG &DAG, const SDLoc &dl) {
5921 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is512BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5921, __extension__ __PRETTY_FUNCTION__))
;
5922 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5923}
5924
5925static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5926 SelectionDAG &DAG,