Bug Summary

File:llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 9955, column 35
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/X86 -I /build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86 -I include -I /build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-command-line-argument -Wno-unknown-warning-option -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/build-llvm -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-26-234817-15343-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/IntrinsicLowering.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineJumpTableInfo.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetLowering.h"
43#include "llvm/CodeGen/WinEHFuncInfo.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/DiagnosticInfo.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/PatternMatch.h"
55#include "llvm/MC/MCAsmInfo.h"
56#include "llvm/MC/MCContext.h"
57#include "llvm/MC/MCExpr.h"
58#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <bitset>
67#include <cctype>
68#include <numeric>
69using namespace llvm;
70
71#define DEBUG_TYPE"x86-isel" "x86-isel"
72
73STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
74
75static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
76 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
77 cl::desc(
78 "Sets the preferable loop alignment for experiments (as log2 bytes) "
79 "for innermost loops only. If specified, this option overrides "
80 "alignment set by x86-experimental-pref-loop-alignment."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
108 const X86Subtarget &STI)
109 : TargetLowering(TM), Subtarget(STI) {
110 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
111 X86ScalarSSEf64 = Subtarget.hasSSE2();
112 X86ScalarSSEf32 = Subtarget.hasSSE1();
113 X86ScalarSSEf16 = Subtarget.hasFP16();
114 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
115
116 // Set up the TargetLowering object.
117
118 // X86 is weird. It always uses i8 for shift amounts and setcc results.
119 setBooleanContents(ZeroOrOneBooleanContent);
120 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
121 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
122
123 // For 64-bit, since we have so many registers, use the ILP scheduler.
124 // For 32-bit, use the register pressure specific scheduling.
125 // For Atom, always use ILP scheduling.
126 if (Subtarget.isAtom())
127 setSchedulingPreference(Sched::ILP);
128 else if (Subtarget.is64Bit())
129 setSchedulingPreference(Sched::ILP);
130 else
131 setSchedulingPreference(Sched::RegPressure);
132 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
133 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
134
135 // Bypass expensive divides and use cheaper ones.
136 if (TM.getOptLevel() >= CodeGenOpt::Default) {
137 if (Subtarget.hasSlowDivide32())
138 addBypassSlowDiv(32, 8);
139 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
140 addBypassSlowDiv(64, 32);
141 }
142
143 // Setup Windows compiler runtime calls.
144 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
145 static const struct {
146 const RTLIB::Libcall Op;
147 const char * const Name;
148 const CallingConv::ID CC;
149 } LibraryCalls[] = {
150 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
151 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
152 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
153 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
154 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
155 };
156
157 for (const auto &LC : LibraryCalls) {
158 setLibcallName(LC.Op, LC.Name);
159 setLibcallCallingConv(LC.Op, LC.CC);
160 }
161 }
162
163 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
164 // MSVCRT doesn't have powi; fall back to pow
165 setLibcallName(RTLIB::POWI_F32, nullptr);
166 setLibcallName(RTLIB::POWI_F64, nullptr);
167 }
168
169 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
170 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
171 // FIXME: Should we be limiting the atomic size on other configs? Default is
172 // 1024.
173 if (!Subtarget.hasCmpxchg8b())
174 setMaxAtomicSizeInBitsSupported(32);
175
176 // Set up the register classes.
177 addRegisterClass(MVT::i8, &X86::GR8RegClass);
178 addRegisterClass(MVT::i16, &X86::GR16RegClass);
179 addRegisterClass(MVT::i32, &X86::GR32RegClass);
180 if (Subtarget.is64Bit())
181 addRegisterClass(MVT::i64, &X86::GR64RegClass);
182
183 for (MVT VT : MVT::integer_valuetypes())
184 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
185
186 // We don't accept any truncstore of integer registers.
187 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
188 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
189 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
190 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
191 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
192 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
193
194 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
195
196 // SETOEQ and SETUNE require checking two conditions.
197 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
198 setCondCodeAction(ISD::SETOEQ, VT, Expand);
199 setCondCodeAction(ISD::SETUNE, VT, Expand);
200 }
201
202 // Integer absolute.
203 if (Subtarget.hasCMov()) {
204 setOperationAction(ISD::ABS , MVT::i16 , Custom);
205 setOperationAction(ISD::ABS , MVT::i32 , Custom);
206 if (Subtarget.is64Bit())
207 setOperationAction(ISD::ABS , MVT::i64 , Custom);
208 }
209
210 // Signed saturation subtraction.
211 setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
212 setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
213 setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
214 if (Subtarget.is64Bit())
215 setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
216
217 // Funnel shifts.
218 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
219 // For slow shld targets we only lower for code size.
220 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
221
222 setOperationAction(ShiftOp , MVT::i8 , Custom);
223 setOperationAction(ShiftOp , MVT::i16 , Custom);
224 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
225 if (Subtarget.is64Bit())
226 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
227 }
228
229 if (!Subtarget.useSoftFloat()) {
230 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
231 // operation.
232 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
233 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
234 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
235 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
236 // We have an algorithm for SSE2, and we turn this into a 64-bit
237 // FILD or VCVTUSI2SS/SD for other targets.
238 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
239 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
240 // We have an algorithm for SSE2->double, and we turn this into a
241 // 64-bit FILD followed by conditional FADD for other targets.
242 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
243 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
244
245 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
246 // this operation.
247 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
248 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
249 // SSE has no i16 to fp conversion, only i32. We promote in the handler
250 // to allow f80 to use i16 and f64 to use i16 with sse1 only
251 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
252 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
253 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
254 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
255 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
256 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
257 // are Legal, f80 is custom lowered.
258 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
259 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
260
261 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
262 // this operation.
263 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
264 // FIXME: This doesn't generate invalid exception when it should. PR44019.
265 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
266 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
267 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
268 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
269 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
270 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
271 // are Legal, f80 is custom lowered.
272 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
273 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
274
275 // Handle FP_TO_UINT by promoting the destination to a larger signed
276 // conversion.
277 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
278 // FIXME: This doesn't generate invalid exception when it should. PR44019.
279 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
280 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
281 // FIXME: This doesn't generate invalid exception when it should. PR44019.
282 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
283 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
284 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
285 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
286 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
287
288 setOperationAction(ISD::LRINT, MVT::f32, Custom);
289 setOperationAction(ISD::LRINT, MVT::f64, Custom);
290 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
291 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
292
293 if (!Subtarget.is64Bit()) {
294 setOperationAction(ISD::LRINT, MVT::i64, Custom);
295 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
296 }
297 }
298
299 if (Subtarget.hasSSE2()) {
300 // Custom lowering for saturating float to int conversions.
301 // We handle promotion to larger result types manually.
302 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
303 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
304 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
305 }
306 if (Subtarget.is64Bit()) {
307 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
308 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
309 }
310 }
311
312 // Handle address space casts between mixed sized pointers.
313 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
314 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
315
316 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
317 if (!X86ScalarSSEf64) {
318 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
319 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
320 if (Subtarget.is64Bit()) {
321 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
322 // Without SSE, i64->f64 goes through memory.
323 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
324 }
325 } else if (!Subtarget.is64Bit())
326 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
327
328 // Scalar integer divide and remainder are lowered to use operations that
329 // produce two results, to match the available instructions. This exposes
330 // the two-result form to trivial CSE, which is able to combine x/y and x%y
331 // into a single instruction.
332 //
333 // Scalar integer multiply-high is also lowered to use two-result
334 // operations, to match the available instructions. However, plain multiply
335 // (low) operations are left as Legal, as there are single-result
336 // instructions for this in x86. Using the two-result multiply instructions
337 // when both high and low results are needed must be arranged by dagcombine.
338 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
339 setOperationAction(ISD::MULHS, VT, Expand);
340 setOperationAction(ISD::MULHU, VT, Expand);
341 setOperationAction(ISD::SDIV, VT, Expand);
342 setOperationAction(ISD::UDIV, VT, Expand);
343 setOperationAction(ISD::SREM, VT, Expand);
344 setOperationAction(ISD::UREM, VT, Expand);
345 }
346
347 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
348 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
349 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
350 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
351 setOperationAction(ISD::BR_CC, VT, Expand);
352 setOperationAction(ISD::SELECT_CC, VT, Expand);
353 }
354 if (Subtarget.is64Bit())
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
356 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
358 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
359
360 setOperationAction(ISD::FREM , MVT::f32 , Expand);
361 setOperationAction(ISD::FREM , MVT::f64 , Expand);
362 setOperationAction(ISD::FREM , MVT::f80 , Expand);
363 setOperationAction(ISD::FREM , MVT::f128 , Expand);
364
365 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
366 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
367 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
368 }
369
370 // Promote the i8 variants and force them on up to i32 which has a shorter
371 // encoding.
372 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
373 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
374
375 if (Subtarget.hasBMI()) {
376 // Promote the i16 zero undef variant and force it on up to i32 when tzcnt
377 // is enabled.
378 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
379 } else {
380 setOperationAction(ISD::CTTZ, MVT::i16, Custom);
381 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
382 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
383 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
384 if (Subtarget.is64Bit()) {
385 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
386 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
387 }
388 }
389
390 if (Subtarget.hasLZCNT()) {
391 // When promoting the i8 variants, force them to i32 for a shorter
392 // encoding.
393 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
394 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
395 } else {
396 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
397 if (VT == MVT::i64 && !Subtarget.is64Bit())
398 continue;
399 setOperationAction(ISD::CTLZ , VT, Custom);
400 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
401 }
402 }
403
404 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
405 ISD::STRICT_FP_TO_FP16}) {
406 // Special handling for half-precision floating point conversions.
407 // If we don't have F16C support, then lower half float conversions
408 // into library calls.
409 setOperationAction(
410 Op, MVT::f32,
411 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
412 // There's never any support for operations beyond MVT::f32.
413 setOperationAction(Op, MVT::f64, Expand);
414 setOperationAction(Op, MVT::f80, Expand);
415 setOperationAction(Op, MVT::f128, Expand);
416 }
417
418 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
419 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
420 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
421 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
422 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
423 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
424 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
425 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
426
427 setOperationAction(ISD::PARITY, MVT::i8, Custom);
428 if (Subtarget.hasPOPCNT()) {
429 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
430 } else {
431 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
432 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
433 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
434 if (Subtarget.is64Bit())
435 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
436 else
437 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
438
439 setOperationAction(ISD::PARITY, MVT::i16, Custom);
440 setOperationAction(ISD::PARITY, MVT::i32, Custom);
441 if (Subtarget.is64Bit())
442 setOperationAction(ISD::PARITY, MVT::i64, Custom);
443 }
444
445 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
446
447 if (!Subtarget.hasMOVBE())
448 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
449
450 // X86 wants to expand cmov itself.
451 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
452 setOperationAction(ISD::SELECT, VT, Custom);
453 setOperationAction(ISD::SETCC, VT, Custom);
454 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
455 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
456 }
457 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
458 if (VT == MVT::i64 && !Subtarget.is64Bit())
459 continue;
460 setOperationAction(ISD::SELECT, VT, Custom);
461 setOperationAction(ISD::SETCC, VT, Custom);
462 }
463
464 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
465 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
466 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
467
468 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
469 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
470 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
471 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
472 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
473 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
474 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
475 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
476
477 // Darwin ABI issue.
478 for (auto VT : { MVT::i32, MVT::i64 }) {
479 if (VT == MVT::i64 && !Subtarget.is64Bit())
480 continue;
481 setOperationAction(ISD::ConstantPool , VT, Custom);
482 setOperationAction(ISD::JumpTable , VT, Custom);
483 setOperationAction(ISD::GlobalAddress , VT, Custom);
484 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
485 setOperationAction(ISD::ExternalSymbol , VT, Custom);
486 setOperationAction(ISD::BlockAddress , VT, Custom);
487 }
488
489 // 64-bit shl, sra, srl (iff 32-bit x86)
490 for (auto VT : { MVT::i32, MVT::i64 }) {
491 if (VT == MVT::i64 && !Subtarget.is64Bit())
492 continue;
493 setOperationAction(ISD::SHL_PARTS, VT, Custom);
494 setOperationAction(ISD::SRA_PARTS, VT, Custom);
495 setOperationAction(ISD::SRL_PARTS, VT, Custom);
496 }
497
498 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
499 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
500
501 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
502
503 // Expand certain atomics
504 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
505 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
506 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
507 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
508 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
509 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
510 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
511 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
512 }
513
514 if (!Subtarget.is64Bit())
515 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
516
517 if (Subtarget.hasCmpxchg16b()) {
518 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
519 }
520
521 // FIXME - use subtarget debug flags
522 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
523 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
524 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
525 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
526 }
527
528 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
529 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
530
531 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
532 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
533
534 setOperationAction(ISD::TRAP, MVT::Other, Legal);
535 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
536 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
537
538 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
539 setOperationAction(ISD::VASTART , MVT::Other, Custom);
540 setOperationAction(ISD::VAEND , MVT::Other, Expand);
541 bool Is64Bit = Subtarget.is64Bit();
542 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
543 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
544
545 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
546 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
547
548 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
549
550 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
551 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
552 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
553
554 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
555 // f32 and f64 use SSE.
556 // Set up the FP register classes.
557 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
558 : &X86::FR32RegClass);
559 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
560 : &X86::FR64RegClass);
561
562 // Disable f32->f64 extload as we can only generate this in one instruction
563 // under optsize. So its easier to pattern match (fpext (load)) for that
564 // case instead of needing to emit 2 instructions for extload in the
565 // non-optsize case.
566 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
567
568 for (auto VT : { MVT::f32, MVT::f64 }) {
569 // Use ANDPD to simulate FABS.
570 setOperationAction(ISD::FABS, VT, Custom);
571
572 // Use XORP to simulate FNEG.
573 setOperationAction(ISD::FNEG, VT, Custom);
574
575 // Use ANDPD and ORPD to simulate FCOPYSIGN.
576 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
577
578 // These might be better off as horizontal vector ops.
579 setOperationAction(ISD::FADD, VT, Custom);
580 setOperationAction(ISD::FSUB, VT, Custom);
581
582 // We don't support sin/cos/fmod
583 setOperationAction(ISD::FSIN , VT, Expand);
584 setOperationAction(ISD::FCOS , VT, Expand);
585 setOperationAction(ISD::FSINCOS, VT, Expand);
586 }
587
588 // Lower this to MOVMSK plus an AND.
589 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
590 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
591
592 } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
593 (UseX87 || Is64Bit)) {
594 // Use SSE for f32, x87 for f64.
595 // Set up the FP register classes.
596 addRegisterClass(MVT::f32, &X86::FR32RegClass);
597 if (UseX87)
598 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
599
600 // Use ANDPS to simulate FABS.
601 setOperationAction(ISD::FABS , MVT::f32, Custom);
602
603 // Use XORP to simulate FNEG.
604 setOperationAction(ISD::FNEG , MVT::f32, Custom);
605
606 if (UseX87)
607 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
608
609 // Use ANDPS and ORPS to simulate FCOPYSIGN.
610 if (UseX87)
611 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
612 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
613
614 // We don't support sin/cos/fmod
615 setOperationAction(ISD::FSIN , MVT::f32, Expand);
616 setOperationAction(ISD::FCOS , MVT::f32, Expand);
617 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
618
619 if (UseX87) {
620 // Always expand sin/cos functions even though x87 has an instruction.
621 setOperationAction(ISD::FSIN, MVT::f64, Expand);
622 setOperationAction(ISD::FCOS, MVT::f64, Expand);
623 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
624 }
625 } else if (UseX87) {
626 // f32 and f64 in x87.
627 // Set up the FP register classes.
628 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
629 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
630
631 for (auto VT : { MVT::f32, MVT::f64 }) {
632 setOperationAction(ISD::UNDEF, VT, Expand);
633 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
634
635 // Always expand sin/cos functions even though x87 has an instruction.
636 setOperationAction(ISD::FSIN , VT, Expand);
637 setOperationAction(ISD::FCOS , VT, Expand);
638 setOperationAction(ISD::FSINCOS, VT, Expand);
639 }
640 }
641
642 // Expand FP32 immediates into loads from the stack, save special cases.
643 if (isTypeLegal(MVT::f32)) {
644 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
645 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
646 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
647 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
648 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
649 } else // SSE immediates.
650 addLegalFPImmediate(APFloat(+0.0f)); // xorps
651 }
652 // Expand FP64 immediates into loads from the stack, save special cases.
653 if (isTypeLegal(MVT::f64)) {
654 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
655 addLegalFPImmediate(APFloat(+0.0)); // FLD0
656 addLegalFPImmediate(APFloat(+1.0)); // FLD1
657 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
658 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
659 } else // SSE immediates.
660 addLegalFPImmediate(APFloat(+0.0)); // xorpd
661 }
662 // Handle constrained floating-point operations of scalar.
663 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
664 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
665 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
666 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
667 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
668 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
669 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
670 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
671 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
672 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
673 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
674 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
675 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
676
677 // We don't support FMA.
678 setOperationAction(ISD::FMA, MVT::f64, Expand);
679 setOperationAction(ISD::FMA, MVT::f32, Expand);
680
681 // f80 always uses X87.
682 if (UseX87) {
683 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
684 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
685 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
686 {
687 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
688 addLegalFPImmediate(TmpFlt); // FLD0
689 TmpFlt.changeSign();
690 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
691
692 bool ignored;
693 APFloat TmpFlt2(+1.0);
694 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
695 &ignored);
696 addLegalFPImmediate(TmpFlt2); // FLD1
697 TmpFlt2.changeSign();
698 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
699 }
700
701 // Always expand sin/cos functions even though x87 has an instruction.
702 setOperationAction(ISD::FSIN , MVT::f80, Expand);
703 setOperationAction(ISD::FCOS , MVT::f80, Expand);
704 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
705
706 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
707 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
708 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
709 setOperationAction(ISD::FRINT, MVT::f80, Expand);
710 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
711 setOperationAction(ISD::FMA, MVT::f80, Expand);
712 setOperationAction(ISD::LROUND, MVT::f80, Expand);
713 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
714 setOperationAction(ISD::LRINT, MVT::f80, Custom);
715 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
716
717 // Handle constrained floating-point operations of scalar.
718 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
719 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
720 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
721 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
722 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
723 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
724 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
725 // as Custom.
726 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
727 }
728
729 // f128 uses xmm registers, but most operations require libcalls.
730 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
731 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
732 : &X86::VR128RegClass);
733
734 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
735
736 setOperationAction(ISD::FADD, MVT::f128, LibCall);
737 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
738 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
739 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
740 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
741 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
742 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
743 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
744 setOperationAction(ISD::FMA, MVT::f128, LibCall);
745 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
746
747 setOperationAction(ISD::FABS, MVT::f128, Custom);
748 setOperationAction(ISD::FNEG, MVT::f128, Custom);
749 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
750
751 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
752 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
753 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
754 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
755 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
756 // No STRICT_FSINCOS
757 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
758 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
759
760 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
761 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
762 // We need to custom handle any FP_ROUND with an f128 input, but
763 // LegalizeDAG uses the result type to know when to run a custom handler.
764 // So we have to list all legal floating point result types here.
765 if (isTypeLegal(MVT::f32)) {
766 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
767 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
768 }
769 if (isTypeLegal(MVT::f64)) {
770 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
771 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
772 }
773 if (isTypeLegal(MVT::f80)) {
774 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
775 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
776 }
777
778 setOperationAction(ISD::SETCC, MVT::f128, Custom);
779
780 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
781 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
782 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
783 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
784 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
785 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
786 }
787
788 // Always use a library call for pow.
789 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
790 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
791 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
792 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
793
794 setOperationAction(ISD::FLOG, MVT::f80, Expand);
795 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
796 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
797 setOperationAction(ISD::FEXP, MVT::f80, Expand);
798 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
799 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
800 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
801
802 // Some FP actions are always expanded for vector types.
803 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
804 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
805 setOperationAction(ISD::FSIN, VT, Expand);
806 setOperationAction(ISD::FSINCOS, VT, Expand);
807 setOperationAction(ISD::FCOS, VT, Expand);
808 setOperationAction(ISD::FREM, VT, Expand);
809 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
810 setOperationAction(ISD::FPOW, VT, Expand);
811 setOperationAction(ISD::FLOG, VT, Expand);
812 setOperationAction(ISD::FLOG2, VT, Expand);
813 setOperationAction(ISD::FLOG10, VT, Expand);
814 setOperationAction(ISD::FEXP, VT, Expand);
815 setOperationAction(ISD::FEXP2, VT, Expand);
816 }
817
818 // First set operation action for all vector types to either promote
819 // (for widening) or expand (for scalarization). Then we will selectively
820 // turn on ones that can be effectively codegen'd.
821 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
822 setOperationAction(ISD::SDIV, VT, Expand);
823 setOperationAction(ISD::UDIV, VT, Expand);
824 setOperationAction(ISD::SREM, VT, Expand);
825 setOperationAction(ISD::UREM, VT, Expand);
826 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
827 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
828 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
829 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
830 setOperationAction(ISD::FMA, VT, Expand);
831 setOperationAction(ISD::FFLOOR, VT, Expand);
832 setOperationAction(ISD::FCEIL, VT, Expand);
833 setOperationAction(ISD::FTRUNC, VT, Expand);
834 setOperationAction(ISD::FRINT, VT, Expand);
835 setOperationAction(ISD::FNEARBYINT, VT, Expand);
836 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
837 setOperationAction(ISD::MULHS, VT, Expand);
838 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
839 setOperationAction(ISD::MULHU, VT, Expand);
840 setOperationAction(ISD::SDIVREM, VT, Expand);
841 setOperationAction(ISD::UDIVREM, VT, Expand);
842 setOperationAction(ISD::CTPOP, VT, Expand);
843 setOperationAction(ISD::CTTZ, VT, Expand);
844 setOperationAction(ISD::CTLZ, VT, Expand);
845 setOperationAction(ISD::ROTL, VT, Expand);
846 setOperationAction(ISD::ROTR, VT, Expand);
847 setOperationAction(ISD::BSWAP, VT, Expand);
848 setOperationAction(ISD::SETCC, VT, Expand);
849 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
850 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
851 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
852 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
853 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
854 setOperationAction(ISD::TRUNCATE, VT, Expand);
855 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
856 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
857 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
858 setOperationAction(ISD::SELECT_CC, VT, Expand);
859 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
860 setTruncStoreAction(InnerVT, VT, Expand);
861
862 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
863 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
864
865 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
866 // types, we have to deal with them whether we ask for Expansion or not.
867 // Setting Expand causes its own optimisation problems though, so leave
868 // them legal.
869 if (VT.getVectorElementType() == MVT::i1)
870 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
871
872 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
873 // split/scalarized right now.
874 if (VT.getVectorElementType() == MVT::f16)
875 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
876 }
877 }
878
879 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
880 // with -msoft-float, disable use of MMX as well.
881 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
882 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
883 // No operations on x86mmx supported, everything uses intrinsics.
884 }
885
886 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
887 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
888 : &X86::VR128RegClass);
889
890 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
891 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
892 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
893 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
894 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
895 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
896 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
897 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
898
899 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
900 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
901
902 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
903 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
904 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
905 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
906 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
907 }
908
909 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
910 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
911 : &X86::VR128RegClass);
912
913 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
914 // registers cannot be used even for integer operations.
915 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
916 : &X86::VR128RegClass);
917 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
918 : &X86::VR128RegClass);
919 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
920 : &X86::VR128RegClass);
921 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
922 : &X86::VR128RegClass);
923
924 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
925 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
926 setOperationAction(ISD::SDIV, VT, Custom);
927 setOperationAction(ISD::SREM, VT, Custom);
928 setOperationAction(ISD::UDIV, VT, Custom);
929 setOperationAction(ISD::UREM, VT, Custom);
930 }
931
932 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
933 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
934 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
935
936 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
937 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
938 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
939 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
940 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
941 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
942 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
943 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
944 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
945 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
946
947 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
948 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
949
950 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
951 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
952 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
953
954 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
955 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
956 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
957 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
958 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
959 }
960
961 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
962 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
963 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
964 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
965 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
966 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
967 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
968 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
969 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
970 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
971
972 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
973 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
974 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
975 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
976
977 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
978 setOperationAction(ISD::SETCC, VT, Custom);
979 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
980 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
981 setOperationAction(ISD::CTPOP, VT, Custom);
982 setOperationAction(ISD::ABS, VT, Custom);
983
984 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
985 // setcc all the way to isel and prefer SETGT in some isel patterns.
986 setCondCodeAction(ISD::SETLT, VT, Custom);
987 setCondCodeAction(ISD::SETLE, VT, Custom);
988 }
989
990 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
991 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
992 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
993 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
994 setOperationAction(ISD::VSELECT, VT, Custom);
995 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
996 }
997
998 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
999 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1000 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1001 setOperationAction(ISD::VSELECT, VT, Custom);
1002
1003 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1004 continue;
1005
1006 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1007 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1008 }
1009
1010 // Custom lower v2i64 and v2f64 selects.
1011 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1012 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1013 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1014 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1015 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1016
1017 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1018 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1019 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1020 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1021 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
1022 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1023
1024 // Custom legalize these to avoid over promotion or custom promotion.
1025 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1026 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1027 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1028 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1029 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1030 }
1031
1032 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1033 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
1034 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1035 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1036
1037 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1038 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1039
1040 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1041 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1042
1043 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1044 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1045 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1046 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1047 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1048
1049 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1050 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1051 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1052 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1053
1054 // We want to legalize this to an f64 load rather than an i64 load on
1055 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1056 // store.
1057 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1058 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1059 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1060 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1061 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1062 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1063
1064 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1065 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1066 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1067 if (!Subtarget.hasAVX512())
1068 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1069
1070 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1071 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1072 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1073
1074 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1075
1076 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1077 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1078 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1079 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1080 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1081 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1082
1083 // In the customized shift lowering, the legal v4i32/v2i64 cases
1084 // in AVX2 will be recognized.
1085 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1086 setOperationAction(ISD::SRL, VT, Custom);
1087 setOperationAction(ISD::SHL, VT, Custom);
1088 setOperationAction(ISD::SRA, VT, Custom);
1089 }
1090
1091 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1092 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1093
1094 // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
1095 // shifts) is better.
1096 if (!Subtarget.useAVX512Regs() &&
1097 !(Subtarget.hasBWI() && Subtarget.hasVLX()))
1098 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1099
1100 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1101 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1102 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1103 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1104 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1105 }
1106
1107 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1108 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1109 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1110 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1111 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1112 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1113 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1114 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1115 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1116
1117 // These might be better off as horizontal vector ops.
1118 setOperationAction(ISD::ADD, MVT::i16, Custom);
1119 setOperationAction(ISD::ADD, MVT::i32, Custom);
1120 setOperationAction(ISD::SUB, MVT::i16, Custom);
1121 setOperationAction(ISD::SUB, MVT::i32, Custom);
1122 }
1123
1124 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1125 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1126 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1127 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1128 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1129 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1130 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1131 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1132 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1133 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1134 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1135 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1136 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1137 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1138
1139 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1140 }
1141
1142 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1143 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1144 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1145 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1146 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1147 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1148 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1149 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1150
1151 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1152 setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
1153 setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
1154
1155 // FIXME: Do we need to handle scalar-to-vector here?
1156 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1157
1158 // We directly match byte blends in the backend as they match the VSELECT
1159 // condition form.
1160 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1161
1162 // SSE41 brings specific instructions for doing vector sign extend even in
1163 // cases where we don't have SRA.
1164 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1165 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1166 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1167 }
1168
1169 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1170 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1171 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1172 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1173 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1174 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1175 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1176 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1177 }
1178
1179 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1180 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1181 // do the pre and post work in the vector domain.
1182 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1183 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1184 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1185 // so that DAG combine doesn't try to turn it into uint_to_fp.
1186 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1187 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1188 }
1189 }
1190
1191 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1192 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1193 }
1194
1195 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1196 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1197 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1198 setOperationAction(ISD::ROTL, VT, Custom);
1199
1200 // XOP can efficiently perform BITREVERSE with VPPERM.
1201 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1202 setOperationAction(ISD::BITREVERSE, VT, Custom);
1203
1204 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1205 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1206 setOperationAction(ISD::BITREVERSE, VT, Custom);
1207 }
1208
1209 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1210 bool HasInt256 = Subtarget.hasInt256();
1211
1212 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1213 : &X86::VR256RegClass);
1214 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1215 : &X86::VR256RegClass);
1216 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1217 : &X86::VR256RegClass);
1218 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1219 : &X86::VR256RegClass);
1220 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1221 : &X86::VR256RegClass);
1222 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1223 : &X86::VR256RegClass);
1224
1225 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1226 setOperationAction(ISD::FFLOOR, VT, Legal);
1227 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1228 setOperationAction(ISD::FCEIL, VT, Legal);
1229 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1230 setOperationAction(ISD::FTRUNC, VT, Legal);
1231 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1232 setOperationAction(ISD::FRINT, VT, Legal);
1233 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1234 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1235 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1236 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1237 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1238
1239 setOperationAction(ISD::FROUND, VT, Custom);
1240
1241 setOperationAction(ISD::FNEG, VT, Custom);
1242 setOperationAction(ISD::FABS, VT, Custom);
1243 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1244 }
1245
1246 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1247 // even though v8i16 is a legal type.
1248 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1249 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1250 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1251 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1252 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1253 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1254 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1255
1256 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1257 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1258
1259 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1260 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1261 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1262 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1263 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1264 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1265 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1266 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1267 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1268 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1269 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1270 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1271
1272 if (!Subtarget.hasAVX512())
1273 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1274
1275 // In the customized shift lowering, the legal v8i32/v4i64 cases
1276 // in AVX2 will be recognized.
1277 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1278 setOperationAction(ISD::SRL, VT, Custom);
1279 setOperationAction(ISD::SHL, VT, Custom);
1280 setOperationAction(ISD::SRA, VT, Custom);
1281 }
1282
1283 // These types need custom splitting if their input is a 128-bit vector.
1284 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1285 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1286 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1287 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1288
1289 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1290 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1291
1292 // With BWI, expanding (and promoting the shifts) is the better.
1293 if (!Subtarget.useBWIRegs())
1294 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1295
1296 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1297 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1298 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1299 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1300 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1301 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1302
1303 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1304 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1305 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1306 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1307 }
1308
1309 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1310 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1311 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1312 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1313
1314 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1315 setOperationAction(ISD::SETCC, VT, Custom);
1316 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1317 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1318 setOperationAction(ISD::CTPOP, VT, Custom);
1319 setOperationAction(ISD::CTLZ, VT, Custom);
1320
1321 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1322 // setcc all the way to isel and prefer SETGT in some isel patterns.
1323 setCondCodeAction(ISD::SETLT, VT, Custom);
1324 setCondCodeAction(ISD::SETLE, VT, Custom);
1325 }
1326
1327 if (Subtarget.hasAnyFMA()) {
1328 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1329 MVT::v2f64, MVT::v4f64 }) {
1330 setOperationAction(ISD::FMA, VT, Legal);
1331 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1332 }
1333 }
1334
1335 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1336 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1337 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1338 }
1339
1340 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1341 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1342 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1343 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1344
1345 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1346 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1347 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1348 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1349 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1350 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1351
1352 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1353 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1354
1355 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1356 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1357 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1358 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1359 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1360
1361 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1362 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1363 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1364 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1365 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1366 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1367 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1368 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1369 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1370 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1371 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1372 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1373
1374 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1375 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1376 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1377 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1378 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1379 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1380 }
1381
1382 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1383 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1384 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1385 }
1386
1387 if (HasInt256) {
1388 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1389 // when we have a 256bit-wide blend with immediate.
1390 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1391 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1392
1393 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1394 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1395 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1396 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1397 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1398 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1399 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1400 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1401 }
1402 }
1403
1404 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1405 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1406 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1407 setOperationAction(ISD::MSTORE, VT, Legal);
1408 }
1409
1410 // Extract subvector is special because the value type
1411 // (result) is 128-bit but the source is 256-bit wide.
1412 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1413 MVT::v4f32, MVT::v2f64 }) {
1414 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1415 }
1416
1417 // Custom lower several nodes for 256-bit types.
1418 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1419 MVT::v8f32, MVT::v4f64 }) {
1420 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1421 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1422 setOperationAction(ISD::VSELECT, VT, Custom);
1423 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1424 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1425 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1426 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1427 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1428 setOperationAction(ISD::STORE, VT, Custom);
1429 }
1430
1431 if (HasInt256) {
1432 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1433
1434 // Custom legalize 2x32 to get a little better code.
1435 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1436 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1437
1438 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1439 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1440 setOperationAction(ISD::MGATHER, VT, Custom);
1441 }
1442 }
1443
1444 // This block controls legalization of the mask vector sizes that are
1445 // available with AVX512. 512-bit vectors are in a separate block controlled
1446 // by useAVX512Regs.
1447 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1448 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1449 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1450 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1451 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1452 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1453
1454 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1455 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1456 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1457
1458 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1459 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1460 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1461 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1462 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1463 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1464 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1465 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1466 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1467 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1468 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1469 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1470
1471 // There is no byte sized k-register load or store without AVX512DQ.
1472 if (!Subtarget.hasDQI()) {
1473 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1474 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1475 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1476 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1477
1478 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1479 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1480 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1481 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1482 }
1483
1484 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1485 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1486 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1487 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1488 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1489 }
1490
1491 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1492 setOperationAction(ISD::VSELECT, VT, Expand);
1493
1494 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1495 setOperationAction(ISD::SETCC, VT, Custom);
1496 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1497 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1498 setOperationAction(ISD::SELECT, VT, Custom);
1499 setOperationAction(ISD::TRUNCATE, VT, Custom);
1500
1501 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1502 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1503 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1504 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1505 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1506 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1507 }
1508
1509 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1510 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1511 }
1512
1513 // This block controls legalization for 512-bit operations with 32/64 bit
1514 // elements. 512-bits can be disabled based on prefer-vector-width and
1515 // required-vector-width function attributes.
1516 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1517 bool HasBWI = Subtarget.hasBWI();
1518
1519 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1520 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1521 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1522 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1523 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1524 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1525
1526 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1527 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1528 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1529 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1530 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1531 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1532 if (HasBWI)
1533 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1534 }
1535
1536 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1537 setOperationAction(ISD::FNEG, VT, Custom);
1538 setOperationAction(ISD::FABS, VT, Custom);
1539 setOperationAction(ISD::FMA, VT, Legal);
1540 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1541 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1542 }
1543
1544 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1545 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1546 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1547 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1548 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1549 }
1550 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1551 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1552 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1553 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1554 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1555 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1556 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1557 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1558
1559 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1560 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1561 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1562 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1563 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1564 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1565 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1566 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1567 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1568 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1569 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1570 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1571
1572 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1573 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1574 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1575 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1576 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1577 if (HasBWI)
1578 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1579
1580 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1581 // to 512-bit rather than use the AVX2 instructions so that we can use
1582 // k-masks.
1583 if (!Subtarget.hasVLX()) {
1584 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1585 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1586 setOperationAction(ISD::MLOAD, VT, Custom);
1587 setOperationAction(ISD::MSTORE, VT, Custom);
1588 }
1589 }
1590
1591 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1592 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1593 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1594 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1595 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1596 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1597 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1598 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1599 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1600 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1601 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1602 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1603 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1604
1605 if (HasBWI) {
1606 // Extends from v64i1 masks to 512-bit vectors.
1607 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1608 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1609 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1610 }
1611
1612 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1613 setOperationAction(ISD::FFLOOR, VT, Legal);
1614 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1615 setOperationAction(ISD::FCEIL, VT, Legal);
1616 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1617 setOperationAction(ISD::FTRUNC, VT, Legal);
1618 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1619 setOperationAction(ISD::FRINT, VT, Legal);
1620 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1621 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1622 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1623 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1624 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1625
1626 setOperationAction(ISD::FROUND, VT, Custom);
1627 }
1628
1629 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1630 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1631 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1632 }
1633
1634 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1635 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1636 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1637 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1638
1639 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1640 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1641 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1642 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1643
1644 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1645 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1646 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1647 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1648 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1649 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1650
1651 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1652 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1653
1654 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1655
1656 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1657 setOperationAction(ISD::SRL, VT, Custom);
1658 setOperationAction(ISD::SHL, VT, Custom);
1659 setOperationAction(ISD::SRA, VT, Custom);
1660 setOperationAction(ISD::SETCC, VT, Custom);
1661
1662 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1663 // setcc all the way to isel and prefer SETGT in some isel patterns.
1664 setCondCodeAction(ISD::SETLT, VT, Custom);
1665 setCondCodeAction(ISD::SETLE, VT, Custom);
1666 }
1667 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1668 setOperationAction(ISD::SMAX, VT, Legal);
1669 setOperationAction(ISD::UMAX, VT, Legal);
1670 setOperationAction(ISD::SMIN, VT, Legal);
1671 setOperationAction(ISD::UMIN, VT, Legal);
1672 setOperationAction(ISD::ABS, VT, Legal);
1673 setOperationAction(ISD::CTPOP, VT, Custom);
1674 setOperationAction(ISD::ROTL, VT, Custom);
1675 setOperationAction(ISD::ROTR, VT, Custom);
1676 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1677 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1678 }
1679
1680 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1681 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1682 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1683 setOperationAction(ISD::CTLZ, VT, Custom);
1684 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1685 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1686 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1687 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1688 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1689 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1690 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1691 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1692 }
1693
1694 if (Subtarget.hasDQI()) {
1695 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1696 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1697 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1698 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1699 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1700 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1701 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1702 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1703
1704 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1705 }
1706
1707 if (Subtarget.hasCDI()) {
1708 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1709 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1710 setOperationAction(ISD::CTLZ, VT, Legal);
1711 }
1712 } // Subtarget.hasCDI()
1713
1714 if (Subtarget.hasVPOPCNTDQ()) {
1715 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1716 setOperationAction(ISD::CTPOP, VT, Legal);
1717 }
1718
1719 // Extract subvector is special because the value type
1720 // (result) is 256-bit but the source is 512-bit wide.
1721 // 128-bit was made Legal under AVX1.
1722 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1723 MVT::v8f32, MVT::v4f64 })
1724 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1725
1726 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1727 MVT::v16f32, MVT::v8f64 }) {
1728 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1729 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1730 setOperationAction(ISD::SELECT, VT, Custom);
1731 setOperationAction(ISD::VSELECT, VT, Custom);
1732 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1733 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1734 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1735 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1736 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1737 }
1738
1739 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1740 setOperationAction(ISD::MLOAD, VT, Legal);
1741 setOperationAction(ISD::MSTORE, VT, Legal);
1742 setOperationAction(ISD::MGATHER, VT, Custom);
1743 setOperationAction(ISD::MSCATTER, VT, Custom);
1744 }
1745 if (HasBWI) {
1746 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1747 setOperationAction(ISD::MLOAD, VT, Legal);
1748 setOperationAction(ISD::MSTORE, VT, Legal);
1749 }
1750 } else {
1751 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1752 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1753 }
1754
1755 if (Subtarget.hasVBMI2()) {
1756 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1757 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1758 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1759 setOperationAction(ISD::FSHL, VT, Custom);
1760 setOperationAction(ISD::FSHR, VT, Custom);
1761 }
1762
1763 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1764 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1765 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1766 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1767 }
1768 }// useAVX512Regs
1769
1770 // This block controls legalization for operations that don't have
1771 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1772 // narrower widths.
1773 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1774 // These operations are handled on non-VLX by artificially widening in
1775 // isel patterns.
1776
1777 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1778 Subtarget.hasVLX() ? Legal : Custom);
1779 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1780 Subtarget.hasVLX() ? Legal : Custom);
1781 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1782 Subtarget.hasVLX() ? Legal : Custom);
1783 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1784 Subtarget.hasVLX() ? Legal : Custom);
1785 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1786 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1787 Subtarget.hasVLX() ? Legal : Custom);
1788 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1789 Subtarget.hasVLX() ? Legal : Custom);
1790 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1791 Subtarget.hasVLX() ? Legal : Custom);
1792 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1793 Subtarget.hasVLX() ? Legal : Custom);
1794
1795 if (Subtarget.hasDQI()) {
1796 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1797 // v2f32 UINT_TO_FP is already custom under SSE2.
1798 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1800, __extension__ __PRETTY_FUNCTION__))
1799 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1800, __extension__ __PRETTY_FUNCTION__))
1800 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1800, __extension__ __PRETTY_FUNCTION__))
;
1801 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1802 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1803 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1804 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1805 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1806 }
1807
1808 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1809 setOperationAction(ISD::SMAX, VT, Legal);
1810 setOperationAction(ISD::UMAX, VT, Legal);
1811 setOperationAction(ISD::SMIN, VT, Legal);
1812 setOperationAction(ISD::UMIN, VT, Legal);
1813 setOperationAction(ISD::ABS, VT, Legal);
1814 }
1815
1816 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1817 setOperationAction(ISD::ROTL, VT, Custom);
1818 setOperationAction(ISD::ROTR, VT, Custom);
1819 }
1820
1821 // Custom legalize 2x32 to get a little better code.
1822 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1823 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1824
1825 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1826 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1827 setOperationAction(ISD::MSCATTER, VT, Custom);
1828
1829 if (Subtarget.hasDQI()) {
1830 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1831 setOperationAction(ISD::SINT_TO_FP, VT,
1832 Subtarget.hasVLX() ? Legal : Custom);
1833 setOperationAction(ISD::UINT_TO_FP, VT,
1834 Subtarget.hasVLX() ? Legal : Custom);
1835 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1836 Subtarget.hasVLX() ? Legal : Custom);
1837 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1838 Subtarget.hasVLX() ? Legal : Custom);
1839 setOperationAction(ISD::FP_TO_SINT, VT,
1840 Subtarget.hasVLX() ? Legal : Custom);
1841 setOperationAction(ISD::FP_TO_UINT, VT,
1842 Subtarget.hasVLX() ? Legal : Custom);
1843 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1844 Subtarget.hasVLX() ? Legal : Custom);
1845 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1846 Subtarget.hasVLX() ? Legal : Custom);
1847 setOperationAction(ISD::MUL, VT, Legal);
1848 }
1849 }
1850
1851 if (Subtarget.hasCDI()) {
1852 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1853 setOperationAction(ISD::CTLZ, VT, Legal);
1854 }
1855 } // Subtarget.hasCDI()
1856
1857 if (Subtarget.hasVPOPCNTDQ()) {
1858 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1859 setOperationAction(ISD::CTPOP, VT, Legal);
1860 }
1861 }
1862
1863 // This block control legalization of v32i1/v64i1 which are available with
1864 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1865 // useBWIRegs.
1866 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1867 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1868 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1869
1870 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1871 setOperationAction(ISD::VSELECT, VT, Expand);
1872 setOperationAction(ISD::TRUNCATE, VT, Custom);
1873 setOperationAction(ISD::SETCC, VT, Custom);
1874 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1875 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1876 setOperationAction(ISD::SELECT, VT, Custom);
1877 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1878 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1879 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1880 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1881 }
1882
1883 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1884 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1885
1886 // Extends from v32i1 masks to 256-bit vectors.
1887 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1888 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1889 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1890
1891 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1892 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1893 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1894 }
1895
1896 // These operations are handled on non-VLX by artificially widening in
1897 // isel patterns.
1898 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1899
1900 if (Subtarget.hasBITALG()) {
1901 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1902 setOperationAction(ISD::CTPOP, VT, Legal);
1903 }
1904 }
1905
1906 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
1907 auto setGroup = [&] (MVT VT) {
1908 setOperationAction(ISD::FADD, VT, Legal);
1909 setOperationAction(ISD::STRICT_FADD, VT, Legal);
1910 setOperationAction(ISD::FSUB, VT, Legal);
1911 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
1912 setOperationAction(ISD::FMUL, VT, Legal);
1913 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
1914 setOperationAction(ISD::FDIV, VT, Legal);
1915 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
1916 setOperationAction(ISD::FSQRT, VT, Legal);
1917 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
1918
1919 setOperationAction(ISD::FFLOOR, VT, Legal);
1920 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1921 setOperationAction(ISD::FCEIL, VT, Legal);
1922 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1923 setOperationAction(ISD::FTRUNC, VT, Legal);
1924 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1925 setOperationAction(ISD::FRINT, VT, Legal);
1926 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1927 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1928 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1929
1930 setOperationAction(ISD::LOAD, VT, Legal);
1931 setOperationAction(ISD::STORE, VT, Legal);
1932
1933 setOperationAction(ISD::FMA, VT, Legal);
1934 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1935 setOperationAction(ISD::VSELECT, VT, Legal);
1936 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1937 setOperationAction(ISD::SELECT, VT, Custom);
1938
1939 setOperationAction(ISD::FNEG, VT, Custom);
1940 setOperationAction(ISD::FABS, VT, Custom);
1941 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1942 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1943 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1944 };
1945
1946 // AVX512_FP16 scalar operations
1947 setGroup(MVT::f16);
1948 addRegisterClass(MVT::f16, &X86::FR16XRegClass);
1949 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
1950 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
1951 setOperationAction(ISD::SETCC, MVT::f16, Custom);
1952 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
1953 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
1954 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
1955 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
1956 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
1957 if (isTypeLegal(MVT::f80)) {
1958 setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
1959 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
1960 }
1961
1962 setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
1963 setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
1964
1965 if (Subtarget.useAVX512Regs()) {
1966 setGroup(MVT::v32f16);
1967 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1968 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
1969 setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
1970 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
1971 setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
1972 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
1973 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
1974 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
1975 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
1976
1977 setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
1978 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
1979 setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
1980 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
1981 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
1982 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
1983 MVT::v32i16);
1984 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
1985 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
1986 MVT::v32i16);
1987 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
1988 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
1989 MVT::v32i16);
1990 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
1991 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
1992 MVT::v32i16);
1993
1994 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
1995 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
1996 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
1997
1998 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
1999 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2000
2001 setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
2002 setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
2003 }
2004
2005 if (Subtarget.hasVLX()) {
2006 addRegisterClass(MVT::v8f16, &X86::VR128XRegClass);
2007 addRegisterClass(MVT::v16f16, &X86::VR256XRegClass);
2008 setGroup(MVT::v8f16);
2009 setGroup(MVT::v16f16);
2010
2011 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
2012 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
2013 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
2014 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
2015 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
2016 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
2017 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
2018 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
2019 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
2020 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
2021
2022 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
2023 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
2024 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
2025 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
2026 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
2027 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
2028
2029 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2030 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
2031 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
2032
2033 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
2034 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
2035 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
2036
2037 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2038 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2039 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2040 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2041
2042 // Need to custom widen these to prevent scalarization.
2043 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2044 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2045 }
2046
2047 // Support fp16 0 immediate
2048 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
2049 }
2050
2051 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2052 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2053 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2054 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2055 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2056 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2057
2058 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2059 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2060 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2061 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2062 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2063
2064 if (Subtarget.hasBWI()) {
2065 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2066 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2067 }
2068
2069 if (Subtarget.hasFP16()) {
2070 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2071 setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
2072 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
2073 setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
2074 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
2075 setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
2076 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
2077 setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
2078 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
2079 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2080 setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
2081 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
2082 setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
2083 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
2084 setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
2085 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
2086 setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
2087 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
2088 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2089 setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
2090 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
2091 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
2092 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
2093 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2094 setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
2095 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
2096 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
2097 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
2098 }
2099
2100 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
2101 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
2102 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
2103 }
2104
2105 if (Subtarget.hasAMXTILE()) {
2106 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2107 }
2108
2109 // We want to custom lower some of our intrinsics.
2110 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
2111 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
2112 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
2113 if (!Subtarget.is64Bit()) {
2114 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
2115 }
2116
2117 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2118 // handle type legalization for these operations here.
2119 //
2120 // FIXME: We really should do custom legalization for addition and
2121 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2122 // than generic legalization for 64-bit multiplication-with-overflow, though.
2123 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2124 if (VT == MVT::i64 && !Subtarget.is64Bit())
2125 continue;
2126 // Add/Sub/Mul with overflow operations are custom lowered.
2127 setOperationAction(ISD::SADDO, VT, Custom);
2128 setOperationAction(ISD::UADDO, VT, Custom);
2129 setOperationAction(ISD::SSUBO, VT, Custom);
2130 setOperationAction(ISD::USUBO, VT, Custom);
2131 setOperationAction(ISD::SMULO, VT, Custom);
2132 setOperationAction(ISD::UMULO, VT, Custom);
2133
2134 // Support carry in as value rather than glue.
2135 setOperationAction(ISD::ADDCARRY, VT, Custom);
2136 setOperationAction(ISD::SUBCARRY, VT, Custom);
2137 setOperationAction(ISD::SETCCCARRY, VT, Custom);
2138 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
2139 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
2140 }
2141
2142 if (!Subtarget.is64Bit()) {
2143 // These libcalls are not available in 32-bit.
2144 setLibcallName(RTLIB::SHL_I128, nullptr);
2145 setLibcallName(RTLIB::SRL_I128, nullptr);
2146 setLibcallName(RTLIB::SRA_I128, nullptr);
2147 setLibcallName(RTLIB::MUL_I128, nullptr);
2148 // The MULO libcall is not part of libgcc, only compiler-rt.
2149 setLibcallName(RTLIB::MULO_I64, nullptr);
2150 }
2151 // The MULO libcall is not part of libgcc, only compiler-rt.
2152 setLibcallName(RTLIB::MULO_I128, nullptr);
2153
2154 // Combine sin / cos into _sincos_stret if it is available.
2155 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2156 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2157 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
2158 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
2159 }
2160
2161 if (Subtarget.isTargetWin64()) {
2162 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2163 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2164 setOperationAction(ISD::SREM, MVT::i128, Custom);
2165 setOperationAction(ISD::UREM, MVT::i128, Custom);
2166 }
2167
2168 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2169 // is. We should promote the value to 64-bits to solve this.
2170 // This is what the CRT headers do - `fmodf` is an inline header
2171 // function casting to f64 and calling `fmod`.
2172 if (Subtarget.is32Bit() &&
2173 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2174 for (ISD::NodeType Op :
2175 {ISD::FCEIL, ISD::STRICT_FCEIL,
2176 ISD::FCOS, ISD::STRICT_FCOS,
2177 ISD::FEXP, ISD::STRICT_FEXP,
2178 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2179 ISD::FREM, ISD::STRICT_FREM,
2180 ISD::FLOG, ISD::STRICT_FLOG,
2181 ISD::FLOG10, ISD::STRICT_FLOG10,
2182 ISD::FPOW, ISD::STRICT_FPOW,
2183 ISD::FSIN, ISD::STRICT_FSIN})
2184 if (isOperationExpand(Op, MVT::f32))
2185 setOperationAction(Op, MVT::f32, Promote);
2186
2187 // We have target-specific dag combine patterns for the following nodes:
2188 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
2189 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
2190 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
2191 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
2192 setTargetDAGCombine(ISD::CONCAT_VECTORS);
2193 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
2194 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
2195 setTargetDAGCombine(ISD::BITCAST);
2196 setTargetDAGCombine(ISD::VSELECT);
2197 setTargetDAGCombine(ISD::SELECT);
2198 setTargetDAGCombine(ISD::SHL);
2199 setTargetDAGCombine(ISD::SRA);
2200 setTargetDAGCombine(ISD::SRL);
2201 setTargetDAGCombine(ISD::OR);
2202 setTargetDAGCombine(ISD::AND);
2203 setTargetDAGCombine(ISD::ADD);
2204 setTargetDAGCombine(ISD::FADD);
2205 setTargetDAGCombine(ISD::FSUB);
2206 setTargetDAGCombine(ISD::FNEG);
2207 setTargetDAGCombine(ISD::FMA);
2208 setTargetDAGCombine(ISD::STRICT_FMA);
2209 setTargetDAGCombine(ISD::FMINNUM);
2210 setTargetDAGCombine(ISD::FMAXNUM);
2211 setTargetDAGCombine(ISD::SUB);
2212 setTargetDAGCombine(ISD::LOAD);
2213 setTargetDAGCombine(ISD::MLOAD);
2214 setTargetDAGCombine(ISD::STORE);
2215 setTargetDAGCombine(ISD::MSTORE);
2216 setTargetDAGCombine(ISD::TRUNCATE);
2217 setTargetDAGCombine(ISD::ZERO_EXTEND);
2218 setTargetDAGCombine(ISD::ANY_EXTEND);
2219 setTargetDAGCombine(ISD::SIGN_EXTEND);
2220 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2221 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2222 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2223 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2224 setTargetDAGCombine(ISD::SINT_TO_FP);
2225 setTargetDAGCombine(ISD::UINT_TO_FP);
2226 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2227 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2228 setTargetDAGCombine(ISD::SETCC);
2229 setTargetDAGCombine(ISD::MUL);
2230 setTargetDAGCombine(ISD::XOR);
2231 setTargetDAGCombine(ISD::MSCATTER);
2232 setTargetDAGCombine(ISD::MGATHER);
2233 setTargetDAGCombine(ISD::FP16_TO_FP);
2234 setTargetDAGCombine(ISD::FP_EXTEND);
2235 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
2236 setTargetDAGCombine(ISD::FP_ROUND);
2237
2238 computeRegisterProperties(Subtarget.getRegisterInfo());
2239
2240 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2241 MaxStoresPerMemsetOptSize = 8;
2242 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2243 MaxStoresPerMemcpyOptSize = 4;
2244 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2245 MaxStoresPerMemmoveOptSize = 4;
2246
2247 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2248 // that needs to benchmarked and balanced with the potential use of vector
2249 // load/store types (PR33329, PR33914).
2250 MaxLoadsPerMemcmp = 2;
2251 MaxLoadsPerMemcmpOptSize = 2;
2252
2253 // Default loop alignment, which can be overridden by -align-loops.
2254 setPrefLoopAlignment(Align(16));
2255
2256 // An out-of-order CPU can speculatively execute past a predictable branch,
2257 // but a conditional move could be stalled by an expensive earlier operation.
2258 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2259 EnableExtLdPromotion = true;
2260 setPrefFunctionAlignment(Align(16));
2261
2262 verifyIntrinsicTables();
2263
2264 // Default to having -disable-strictnode-mutation on
2265 IsStrictFPEnabled = true;
2266}
2267
2268// This has so far only been implemented for 64-bit MachO.
2269bool X86TargetLowering::useLoadStackGuardNode() const {
2270 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2271}
2272
2273bool X86TargetLowering::useStackGuardXorFP() const {
2274 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2275 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2276}
2277
2278SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2279 const SDLoc &DL) const {
2280 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2281 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2282 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2283 return SDValue(Node, 0);
2284}
2285
2286TargetLoweringBase::LegalizeTypeAction
2287X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2288 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2289 !Subtarget.hasBWI())
2290 return TypeSplitVector;
2291
2292 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2293 VT.getVectorElementType() != MVT::i1)
2294 return TypeWidenVector;
2295
2296 return TargetLoweringBase::getPreferredVectorAction(VT);
2297}
2298
2299static std::pair<MVT, unsigned>
2300handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2301 const X86Subtarget &Subtarget) {
2302 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2303 // convention is one that uses k registers.
2304 if (NumElts == 2)
2305 return {MVT::v2i64, 1};
2306 if (NumElts == 4)
2307 return {MVT::v4i32, 1};
2308 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2309 CC != CallingConv::Intel_OCL_BI)
2310 return {MVT::v8i16, 1};
2311 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2312 CC != CallingConv::Intel_OCL_BI)
2313 return {MVT::v16i8, 1};
2314 // v32i1 passes in ymm unless we have BWI and the calling convention is
2315 // regcall.
2316 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2317 return {MVT::v32i8, 1};
2318 // Split v64i1 vectors if we don't have v64i8 available.
2319 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2320 if (Subtarget.useAVX512Regs())
2321 return {MVT::v64i8, 1};
2322 return {MVT::v32i8, 2};
2323 }
2324
2325 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2326 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2327 NumElts > 64)
2328 return {MVT::i8, NumElts};
2329
2330 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2331}
2332
2333MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2334 CallingConv::ID CC,
2335 EVT VT) const {
2336 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2337 Subtarget.hasAVX512()) {
2338 unsigned NumElts = VT.getVectorNumElements();
2339
2340 MVT RegisterVT;
2341 unsigned NumRegisters;
2342 std::tie(RegisterVT, NumRegisters) =
2343 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2344 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2345 return RegisterVT;
2346 }
2347
2348 // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
2349 // So its default register type is f16. We override the type to v8f16 here.
2350 if (VT == MVT::v3f16 && Subtarget.hasFP16())
2351 return MVT::v8f16;
2352
2353 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2354}
2355
2356unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2357 CallingConv::ID CC,
2358 EVT VT) const {
2359 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2360 Subtarget.hasAVX512()) {
2361 unsigned NumElts = VT.getVectorNumElements();
2362
2363 MVT RegisterVT;
2364 unsigned NumRegisters;
2365 std::tie(RegisterVT, NumRegisters) =
2366 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2367 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2368 return NumRegisters;
2369 }
2370
2371 // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
2372 // So its default register number is 3. We override the number to 1 here.
2373 if (VT == MVT::v3f16 && Subtarget.hasFP16())
2374 return 1;
2375
2376 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2377}
2378
2379unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2380 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2381 unsigned &NumIntermediates, MVT &RegisterVT) const {
2382 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2383 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2384 Subtarget.hasAVX512() &&
2385 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2386 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2387 VT.getVectorNumElements() > 64)) {
2388 RegisterVT = MVT::i8;
2389 IntermediateVT = MVT::i1;
2390 NumIntermediates = VT.getVectorNumElements();
2391 return NumIntermediates;
2392 }
2393
2394 // Split v64i1 vectors if we don't have v64i8 available.
2395 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2396 CC != CallingConv::X86_RegCall) {
2397 RegisterVT = MVT::v32i8;
2398 IntermediateVT = MVT::v32i1;
2399 NumIntermediates = 2;
2400 return 2;
2401 }
2402
2403 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2404 NumIntermediates, RegisterVT);
2405}
2406
2407EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2408 LLVMContext& Context,
2409 EVT VT) const {
2410 if (!VT.isVector())
2411 return MVT::i8;
2412
2413 if (Subtarget.hasAVX512()) {
2414 // Figure out what this type will be legalized to.
2415 EVT LegalVT = VT;
2416 while (getTypeAction(Context, LegalVT) != TypeLegal)
2417 LegalVT = getTypeToTransformTo(Context, LegalVT);
2418
2419 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2420 if (LegalVT.getSimpleVT().is512BitVector())
2421 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2422
2423 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2424 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2425 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2426 // vXi16/vXi8.
2427 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2428 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2429 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2430 }
2431 }
2432
2433 return VT.changeVectorElementTypeToInteger();
2434}
2435
2436/// Helper for getByValTypeAlignment to determine
2437/// the desired ByVal argument alignment.
2438static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2439 if (MaxAlign == 16)
2440 return;
2441 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2442 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2443 MaxAlign = Align(16);
2444 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2445 Align EltAlign;
2446 getMaxByValAlign(ATy->getElementType(), EltAlign);
2447 if (EltAlign > MaxAlign)
2448 MaxAlign = EltAlign;
2449 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2450 for (auto *EltTy : STy->elements()) {
2451 Align EltAlign;
2452 getMaxByValAlign(EltTy, EltAlign);
2453 if (EltAlign > MaxAlign)
2454 MaxAlign = EltAlign;
2455 if (MaxAlign == 16)
2456 break;
2457 }
2458 }
2459}
2460
2461/// Return the desired alignment for ByVal aggregate
2462/// function arguments in the caller parameter area. For X86, aggregates
2463/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2464/// are at 4-byte boundaries.
2465unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2466 const DataLayout &DL) const {
2467 if (Subtarget.is64Bit()) {
2468 // Max of 8 and alignment of type.
2469 Align TyAlign = DL.getABITypeAlign(Ty);
2470 if (TyAlign > 8)
2471 return TyAlign.value();
2472 return 8;
2473 }
2474
2475 Align Alignment(4);
2476 if (Subtarget.hasSSE1())
2477 getMaxByValAlign(Ty, Alignment);
2478 return Alignment.value();
2479}
2480
2481/// It returns EVT::Other if the type should be determined using generic
2482/// target-independent logic.
2483/// For vector ops we check that the overall size isn't larger than our
2484/// preferred vector width.
2485EVT X86TargetLowering::getOptimalMemOpType(
2486 const MemOp &Op, const AttributeList &FuncAttributes) const {
2487 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2488 if (Op.size() >= 16 &&
2489 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2490 // FIXME: Check if unaligned 64-byte accesses are slow.
2491 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2492 (Subtarget.getPreferVectorWidth() >= 512)) {
2493 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2494 }
2495 // FIXME: Check if unaligned 32-byte accesses are slow.
2496 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2497 (Subtarget.getPreferVectorWidth() >= 256)) {
2498 // Although this isn't a well-supported type for AVX1, we'll let
2499 // legalization and shuffle lowering produce the optimal codegen. If we
2500 // choose an optimal type with a vector element larger than a byte,
2501 // getMemsetStores() may create an intermediate splat (using an integer
2502 // multiply) before we splat as a vector.
2503 return MVT::v32i8;
2504 }
2505 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2506 return MVT::v16i8;
2507 // TODO: Can SSE1 handle a byte vector?
2508 // If we have SSE1 registers we should be able to use them.
2509 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2510 (Subtarget.getPreferVectorWidth() >= 128))
2511 return MVT::v4f32;
2512 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2513 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2514 // Do not use f64 to lower memcpy if source is string constant. It's
2515 // better to use i32 to avoid the loads.
2516 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2517 // The gymnastics of splatting a byte value into an XMM register and then
2518 // only using 8-byte stores (because this is a CPU with slow unaligned
2519 // 16-byte accesses) makes that a loser.
2520 return MVT::f64;
2521 }
2522 }
2523 // This is a compromise. If we reach here, unaligned accesses may be slow on
2524 // this target. However, creating smaller, aligned accesses could be even
2525 // slower and would certainly be a lot more code.
2526 if (Subtarget.is64Bit() && Op.size() >= 8)
2527 return MVT::i64;
2528 return MVT::i32;
2529}
2530
2531bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2532 if (VT == MVT::f32)
2533 return X86ScalarSSEf32;
2534 if (VT == MVT::f64)
2535 return X86ScalarSSEf64;
2536 return true;
2537}
2538
2539bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2540 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2541 bool *Fast) const {
2542 if (Fast) {
2543 switch (VT.getSizeInBits()) {
2544 default:
2545 // 8-byte and under are always assumed to be fast.
2546 *Fast = true;
2547 break;
2548 case 128:
2549 *Fast = !Subtarget.isUnalignedMem16Slow();
2550 break;
2551 case 256:
2552 *Fast = !Subtarget.isUnalignedMem32Slow();
2553 break;
2554 // TODO: What about AVX-512 (512-bit) accesses?
2555 }
2556 }
2557 // NonTemporal vector memory ops must be aligned.
2558 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2559 // NT loads can only be vector aligned, so if its less aligned than the
2560 // minimum vector size (which we can split the vector down to), we might as
2561 // well use a regular unaligned vector load.
2562 // We don't have any NT loads pre-SSE41.
2563 if (!!(Flags & MachineMemOperand::MOLoad))
2564 return (Alignment < 16 || !Subtarget.hasSSE41());
2565 return false;
2566 }
2567 // Misaligned accesses of any size are always allowed.
2568 return true;
2569}
2570
2571/// Return the entry encoding for a jump table in the
2572/// current function. The returned value is a member of the
2573/// MachineJumpTableInfo::JTEntryKind enum.
2574unsigned X86TargetLowering::getJumpTableEncoding() const {
2575 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2576 // symbol.
2577 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2578 return MachineJumpTableInfo::EK_Custom32;
2579
2580 // Otherwise, use the normal jump table encoding heuristics.
2581 return TargetLowering::getJumpTableEncoding();
2582}
2583
2584bool X86TargetLowering::useSoftFloat() const {
2585 return Subtarget.useSoftFloat();
2586}
2587
2588void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2589 ArgListTy &Args) const {
2590
2591 // Only relabel X86-32 for C / Stdcall CCs.
2592 if (Subtarget.is64Bit())
2593 return;
2594 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2595 return;
2596 unsigned ParamRegs = 0;
2597 if (auto *M = MF->getFunction().getParent())
2598 ParamRegs = M->getNumberRegisterParameters();
2599
2600 // Mark the first N int arguments as having reg
2601 for (auto &Arg : Args) {
2602 Type *T = Arg.Ty;
2603 if (T->isIntOrPtrTy())
2604 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2605 unsigned numRegs = 1;
2606 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2607 numRegs = 2;
2608 if (ParamRegs < numRegs)
2609 return;
2610 ParamRegs -= numRegs;
2611 Arg.IsInReg = true;
2612 }
2613 }
2614}
2615
2616const MCExpr *
2617X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2618 const MachineBasicBlock *MBB,
2619 unsigned uid,MCContext &Ctx) const{
2620 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2620, __extension__ __PRETTY_FUNCTION__))
;
2621 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2622 // entries.
2623 return MCSymbolRefExpr::create(MBB->getSymbol(),
2624 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2625}
2626
2627/// Returns relocation base for the given PIC jumptable.
2628SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2629 SelectionDAG &DAG) const {
2630 if (!Subtarget.is64Bit())
2631 // This doesn't have SDLoc associated with it, but is not really the
2632 // same as a Register.
2633 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2634 getPointerTy(DAG.getDataLayout()));
2635 return Table;
2636}
2637
2638/// This returns the relocation base for the given PIC jumptable,
2639/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2640const MCExpr *X86TargetLowering::
2641getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2642 MCContext &Ctx) const {
2643 // X86-64 uses RIP relative addressing based on the jump table label.
2644 if (Subtarget.isPICStyleRIPRel())
2645 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2646
2647 // Otherwise, the reference is relative to the PIC base.
2648 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2649}
2650
2651std::pair<const TargetRegisterClass *, uint8_t>
2652X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2653 MVT VT) const {
2654 const TargetRegisterClass *RRC = nullptr;
2655 uint8_t Cost = 1;
2656 switch (VT.SimpleTy) {
2657 default:
2658 return TargetLowering::findRepresentativeClass(TRI, VT);
2659 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2660 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2661 break;
2662 case MVT::x86mmx:
2663 RRC = &X86::VR64RegClass;
2664 break;
2665 case MVT::f32: case MVT::f64:
2666 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2667 case MVT::v4f32: case MVT::v2f64:
2668 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2669 case MVT::v8f32: case MVT::v4f64:
2670 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2671 case MVT::v16f32: case MVT::v8f64:
2672 RRC = &X86::VR128XRegClass;
2673 break;
2674 }
2675 return std::make_pair(RRC, Cost);
2676}
2677
2678unsigned X86TargetLowering::getAddressSpace() const {
2679 if (Subtarget.is64Bit())
2680 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2681 return 256;
2682}
2683
2684static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2685 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2686 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2687}
2688
2689static Constant* SegmentOffset(IRBuilderBase &IRB,
2690 int Offset, unsigned AddressSpace) {
2691 return ConstantExpr::getIntToPtr(
2692 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2693 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2694}
2695
2696Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
2697 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2698 // tcbhead_t; use it instead of the usual global variable (see
2699 // sysdeps/{i386,x86_64}/nptl/tls.h)
2700 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2701 if (Subtarget.isTargetFuchsia()) {
2702 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2703 return SegmentOffset(IRB, 0x10, getAddressSpace());
2704 } else {
2705 unsigned AddressSpace = getAddressSpace();
2706 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
2707 // Specially, some users may customize the base reg and offset.
2708 int Offset = M->getStackProtectorGuardOffset();
2709 // If we don't set -stack-protector-guard-offset value:
2710 // %fs:0x28, unless we're using a Kernel code model, in which case
2711 // it's %gs:0x28. gs:0x14 on i386.
2712 if (Offset == INT_MAX2147483647)
2713 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2714
2715 StringRef GuardReg = M->getStackProtectorGuardReg();
2716 if (GuardReg == "fs")
2717 AddressSpace = X86AS::FS;
2718 else if (GuardReg == "gs")
2719 AddressSpace = X86AS::GS;
2720 return SegmentOffset(IRB, Offset, AddressSpace);
2721 }
2722 }
2723 return TargetLowering::getIRStackGuard(IRB);
2724}
2725
2726void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2727 // MSVC CRT provides functionalities for stack protection.
2728 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2729 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2730 // MSVC CRT has a global variable holding security cookie.
2731 M.getOrInsertGlobal("__security_cookie",
2732 Type::getInt8PtrTy(M.getContext()));
2733
2734 // MSVC CRT has a function to validate security cookie.
2735 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2736 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2737 Type::getInt8PtrTy(M.getContext()));
2738 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2739 F->setCallingConv(CallingConv::X86_FastCall);
2740 F->addParamAttr(0, Attribute::AttrKind::InReg);
2741 }
2742 return;
2743 }
2744
2745 StringRef GuardMode = M.getStackProtectorGuard();
2746
2747 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2748 if ((GuardMode == "tls" || GuardMode.empty()) &&
2749 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2750 return;
2751 TargetLowering::insertSSPDeclarations(M);
2752}
2753
2754Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2755 // MSVC CRT has a global variable holding security cookie.
2756 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2757 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2758 return M.getGlobalVariable("__security_cookie");
2759 }
2760 return TargetLowering::getSDagStackGuard(M);
2761}
2762
2763Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2764 // MSVC CRT has a function to validate security cookie.
2765 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2766 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2767 return M.getFunction("__security_check_cookie");
2768 }
2769 return TargetLowering::getSSPStackGuardCheck(M);
2770}
2771
2772Value *
2773X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
2774 if (Subtarget.getTargetTriple().isOSContiki())
2775 return getDefaultSafeStackPointerLocation(IRB, false);
2776
2777 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2778 // definition of TLS_SLOT_SAFESTACK in
2779 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2780 if (Subtarget.isTargetAndroid()) {
2781 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2782 // %gs:0x24 on i386
2783 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2784 return SegmentOffset(IRB, Offset, getAddressSpace());
2785 }
2786
2787 // Fuchsia is similar.
2788 if (Subtarget.isTargetFuchsia()) {
2789 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2790 return SegmentOffset(IRB, 0x18, getAddressSpace());
2791 }
2792
2793 return TargetLowering::getSafeStackPointerLocation(IRB);
2794}
2795
2796//===----------------------------------------------------------------------===//
2797// Return Value Calling Convention Implementation
2798//===----------------------------------------------------------------------===//
2799
2800bool X86TargetLowering::CanLowerReturn(
2801 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2802 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2803 SmallVector<CCValAssign, 16> RVLocs;
2804 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2805 return CCInfo.CheckReturn(Outs, RetCC_X86);
2806}
2807
2808const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2809 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2810 return ScratchRegs;
2811}
2812
2813/// Lowers masks values (v*i1) to the local register values
2814/// \returns DAG node after lowering to register type
2815static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2816 const SDLoc &Dl, SelectionDAG &DAG) {
2817 EVT ValVT = ValArg.getValueType();
2818
2819 if (ValVT == MVT::v1i1)
2820 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2821 DAG.getIntPtrConstant(0, Dl));
2822
2823 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2824 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2825 // Two stage lowering might be required
2826 // bitcast: v8i1 -> i8 / v16i1 -> i16
2827 // anyextend: i8 -> i32 / i16 -> i32
2828 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2829 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2830 if (ValLoc == MVT::i32)
2831 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2832 return ValToCopy;
2833 }
2834
2835 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2836 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2837 // One stage lowering is required
2838 // bitcast: v32i1 -> i32 / v64i1 -> i64
2839 return DAG.getBitcast(ValLoc, ValArg);
2840 }
2841
2842 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2843}
2844
2845/// Breaks v64i1 value into two registers and adds the new node to the DAG
2846static void Passv64i1ArgInRegs(
2847 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2848 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2849 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2850 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2850, __extension__ __PRETTY_FUNCTION__))
;
2851 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2851, __extension__ __PRETTY_FUNCTION__))
;
2852 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2852, __extension__ __PRETTY_FUNCTION__))
;
2853 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2854, __extension__ __PRETTY_FUNCTION__))
2854 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2854, __extension__ __PRETTY_FUNCTION__))
;
2855
2856 // Before splitting the value we cast it to i64
2857 Arg = DAG.getBitcast(MVT::i64, Arg);
2858
2859 // Splitting the value into two i32 types
2860 SDValue Lo, Hi;
2861 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2862 DAG.getConstant(0, Dl, MVT::i32));
2863 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2864 DAG.getConstant(1, Dl, MVT::i32));
2865
2866 // Attach the two i32 types into corresponding registers
2867 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2868 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2869}
2870
2871SDValue
2872X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2873 bool isVarArg,
2874 const SmallVectorImpl<ISD::OutputArg> &Outs,
2875 const SmallVectorImpl<SDValue> &OutVals,
2876 const SDLoc &dl, SelectionDAG &DAG) const {
2877 MachineFunction &MF = DAG.getMachineFunction();
2878 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2879
2880 // In some cases we need to disable registers from the default CSR list.
2881 // For example, when they are used for argument passing.
2882 bool ShouldDisableCalleeSavedRegister =
2883 CallConv == CallingConv::X86_RegCall ||
2884 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2885
2886 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2887 report_fatal_error("X86 interrupts may not return any value");
2888
2889 SmallVector<CCValAssign, 16> RVLocs;
2890 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2891 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2892
2893 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2894 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2895 ++I, ++OutsIndex) {
2896 CCValAssign &VA = RVLocs[I];
2897 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2897, __extension__ __PRETTY_FUNCTION__))
;
2898
2899 // Add the register to the CalleeSaveDisableRegs list.
2900 if (ShouldDisableCalleeSavedRegister)
2901 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2902
2903 SDValue ValToCopy = OutVals[OutsIndex];
2904 EVT ValVT = ValToCopy.getValueType();
2905
2906 // Promote values to the appropriate types.
2907 if (VA.getLocInfo() == CCValAssign::SExt)
2908 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2909 else if (VA.getLocInfo() == CCValAssign::ZExt)
2910 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2911 else if (VA.getLocInfo() == CCValAssign::AExt) {
2912 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2913 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2914 else
2915 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2916 }
2917 else if (VA.getLocInfo() == CCValAssign::BCvt)
2918 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2919
2920 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2921, __extension__ __PRETTY_FUNCTION__))
2921 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2921, __extension__ __PRETTY_FUNCTION__))
;
2922
2923 // Report an error if we have attempted to return a value via an XMM
2924 // register and SSE was disabled.
2925 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2926 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2927 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2928 } else if (!Subtarget.hasSSE2() &&
2929 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2930 ValVT == MVT::f64) {
2931 // When returning a double via an XMM register, report an error if SSE2 is
2932 // not enabled.
2933 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2934 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2935 }
2936
2937 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2938 // the RET instruction and handled by the FP Stackifier.
2939 if (VA.getLocReg() == X86::FP0 ||
2940 VA.getLocReg() == X86::FP1) {
2941 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2942 // change the value to the FP stack register class.
2943 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2944 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2945 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2946 // Don't emit a copytoreg.
2947 continue;
2948 }
2949
2950 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2951 // which is returned in RAX / RDX.
2952 if (Subtarget.is64Bit()) {
2953 if (ValVT == MVT::x86mmx) {
2954 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2955 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2956 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2957 ValToCopy);
2958 // If we don't have SSE2 available, convert to v4f32 so the generated
2959 // register is legal.
2960 if (!Subtarget.hasSSE2())
2961 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2962 }
2963 }
2964 }
2965
2966 if (VA.needsCustom()) {
2967 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2968, __extension__ __PRETTY_FUNCTION__))
2968 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2968, __extension__ __PRETTY_FUNCTION__))
;
2969
2970 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2971 Subtarget);
2972
2973 // Add the second register to the CalleeSaveDisableRegs list.
2974 if (ShouldDisableCalleeSavedRegister)
2975 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2976 } else {
2977 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2978 }
2979 }
2980
2981 SDValue Flag;
2982 SmallVector<SDValue, 6> RetOps;
2983 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2984 // Operand #1 = Bytes To Pop
2985 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2986 MVT::i32));
2987
2988 // Copy the result values into the output registers.
2989 for (auto &RetVal : RetVals) {
2990 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2991 RetOps.push_back(RetVal.second);
2992 continue; // Don't emit a copytoreg.
2993 }
2994
2995 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2996 Flag = Chain.getValue(1);
2997 RetOps.push_back(
2998 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
2999 }
3000
3001 // Swift calling convention does not require we copy the sret argument
3002 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
3003
3004 // All x86 ABIs require that for returning structs by value we copy
3005 // the sret argument into %rax/%eax (depending on ABI) for the return.
3006 // We saved the argument into a virtual register in the entry block,
3007 // so now we copy the value out and into %rax/%eax.
3008 //
3009 // Checking Function.hasStructRetAttr() here is insufficient because the IR
3010 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
3011 // false, then an sret argument may be implicitly inserted in the SelDAG. In
3012 // either case FuncInfo->setSRetReturnReg() will have been called.
3013 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3014 // When we have both sret and another return value, we should use the
3015 // original Chain stored in RetOps[0], instead of the current Chain updated
3016 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
3017
3018 // For the case of sret and another return value, we have
3019 // Chain_0 at the function entry
3020 // Chain_1 = getCopyToReg(Chain_0) in the above loop
3021 // If we use Chain_1 in getCopyFromReg, we will have
3022 // Val = getCopyFromReg(Chain_1)
3023 // Chain_2 = getCopyToReg(Chain_1, Val) from below
3024
3025 // getCopyToReg(Chain_0) will be glued together with
3026 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
3027 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
3028 // Data dependency from Unit B to Unit A due to usage of Val in
3029 // getCopyToReg(Chain_1, Val)
3030 // Chain dependency from Unit A to Unit B
3031
3032 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
3033 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
3034 getPointerTy(MF.getDataLayout()));
3035
3036 Register RetValReg
3037 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
3038 X86::RAX : X86::EAX;
3039 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
3040 Flag = Chain.getValue(1);
3041
3042 // RAX/EAX now acts like a return value.
3043 RetOps.push_back(
3044 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
3045
3046 // Add the returned register to the CalleeSaveDisableRegs list.
3047 if (ShouldDisableCalleeSavedRegister)
3048 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
3049 }
3050
3051 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3052 const MCPhysReg *I =
3053 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3054 if (I) {
3055 for (; *I; ++I) {
3056 if (X86::GR64RegClass.contains(*I))
3057 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3058 else
3059 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3059)
;
3060 }
3061 }
3062
3063 RetOps[0] = Chain; // Update chain.
3064
3065 // Add the flag if we have it.
3066 if (Flag.getNode())
3067 RetOps.push_back(Flag);
3068
3069 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
3070 if (CallConv == CallingConv::X86_INTR)
3071 opcode = X86ISD::IRET;
3072 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
3073}
3074
3075bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3076 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
3077 return false;
3078
3079 SDValue TCChain = Chain;
3080 SDNode *Copy = *N->use_begin();
3081 if (Copy->getOpcode() == ISD::CopyToReg) {
3082 // If the copy has a glue operand, we conservatively assume it isn't safe to
3083 // perform a tail call.
3084 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3085 return false;
3086 TCChain = Copy->getOperand(0);
3087 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
3088 return false;
3089
3090 bool HasRet = false;
3091 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
3092 UI != UE; ++UI) {
3093 if (UI->getOpcode() != X86ISD::RET_FLAG)
3094 return false;
3095 // If we are returning more than one value, we can definitely
3096 // not make a tail call see PR19530
3097 if (UI->getNumOperands() > 4)
3098 return false;
3099 if (UI->getNumOperands() == 4 &&
3100 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
3101 return false;
3102 HasRet = true;
3103 }
3104
3105 if (!HasRet)
3106 return false;
3107
3108 Chain = TCChain;
3109 return true;
3110}
3111
3112EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
3113 ISD::NodeType ExtendKind) const {
3114 MVT ReturnMVT = MVT::i32;
3115
3116 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
3117 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
3118 // The ABI does not require i1, i8 or i16 to be extended.
3119 //
3120 // On Darwin, there is code in the wild relying on Clang's old behaviour of
3121 // always extending i8/i16 return values, so keep doing that for now.
3122 // (PR26665).
3123 ReturnMVT = MVT::i8;
3124 }
3125
3126 EVT MinVT = getRegisterType(Context, ReturnMVT);
3127 return VT.bitsLT(MinVT) ? MinVT : VT;
3128}
3129
3130/// Reads two 32 bit registers and creates a 64 bit mask value.
3131/// \param VA The current 32 bit value that need to be assigned.
3132/// \param NextVA The next 32 bit value that need to be assigned.
3133/// \param Root The parent DAG node.
3134/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
3135/// glue purposes. In the case the DAG is already using
3136/// physical register instead of virtual, we should glue
3137/// our new SDValue to InFlag SDvalue.
3138/// \return a new SDvalue of size 64bit.
3139static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
3140 SDValue &Root, SelectionDAG &DAG,
3141 const SDLoc &Dl, const X86Subtarget &Subtarget,
3142 SDValue *InFlag = nullptr) {
3143 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3143, __extension__ __PRETTY_FUNCTION__))
;
3144 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3144, __extension__ __PRETTY_FUNCTION__))
;
3145 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3146, __extension__ __PRETTY_FUNCTION__))
3146 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3146, __extension__ __PRETTY_FUNCTION__))
;
3147 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3148, __extension__ __PRETTY_FUNCTION__))
3148 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3148, __extension__ __PRETTY_FUNCTION__))
;
3149 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3150, __extension__ __PRETTY_FUNCTION__))
3150 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3150, __extension__ __PRETTY_FUNCTION__))
;
3151
3152 SDValue Lo, Hi;
3153 SDValue ArgValueLo, ArgValueHi;
3154
3155 MachineFunction &MF = DAG.getMachineFunction();
3156 const TargetRegisterClass *RC = &X86::GR32RegClass;
3157
3158 // Read a 32 bit value from the registers.
3159 if (nullptr == InFlag) {
3160 // When no physical register is present,
3161 // create an intermediate virtual register.
3162 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3163 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3164 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3165 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3166 } else {
3167 // When a physical register is available read the value from it and glue
3168 // the reads together.
3169 ArgValueLo =
3170 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
3171 *InFlag = ArgValueLo.getValue(2);
3172 ArgValueHi =
3173 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
3174 *InFlag = ArgValueHi.getValue(2);
3175 }
3176
3177 // Convert the i32 type into v32i1 type.
3178 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
3179
3180 // Convert the i32 type into v32i1 type.
3181 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
3182
3183 // Concatenate the two values together.
3184 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
3185}
3186
3187/// The function will lower a register of various sizes (8/16/32/64)
3188/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
3189/// \returns a DAG node contains the operand after lowering to mask type.
3190static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3191 const EVT &ValLoc, const SDLoc &Dl,
3192 SelectionDAG &DAG) {
3193 SDValue ValReturned = ValArg;
3194
3195 if (ValVT == MVT::v1i1)
3196 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3197
3198 if (ValVT == MVT::v64i1) {
3199 // In 32 bit machine, this case is handled by getv64i1Argument
3200 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3200, __extension__ __PRETTY_FUNCTION__))
;
3201 // In 64 bit machine, There is no need to truncate the value only bitcast
3202 } else {
3203 MVT maskLen;
3204 switch (ValVT.getSimpleVT().SimpleTy) {
3205 case MVT::v8i1:
3206 maskLen = MVT::i8;
3207 break;
3208 case MVT::v16i1:
3209 maskLen = MVT::i16;
3210 break;
3211 case MVT::v32i1:
3212 maskLen = MVT::i32;
3213 break;
3214 default:
3215 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3215)
;
3216 }
3217
3218 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3219 }
3220 return DAG.getBitcast(ValVT, ValReturned);
3221}
3222
3223/// Lower the result values of a call into the
3224/// appropriate copies out of appropriate physical registers.
3225///
3226SDValue X86TargetLowering::LowerCallResult(
3227 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3228 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3229 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3230 uint32_t *RegMask) const {
3231
3232 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3233 // Assign locations to each value returned by this call.
3234 SmallVector<CCValAssign, 16> RVLocs;
3235 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3236 *DAG.getContext());
3237 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3238
3239 // Copy all of the result registers out of their specified physreg.
3240 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3241 ++I, ++InsIndex) {
3242 CCValAssign &VA = RVLocs[I];
3243 EVT CopyVT = VA.getLocVT();
3244
3245 // In some calling conventions we need to remove the used registers
3246 // from the register mask.
3247 if (RegMask) {
3248 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3249 SubRegs.isValid(); ++SubRegs)
3250 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3251 }
3252
3253 // Report an error if there was an attempt to return FP values via XMM
3254 // registers.
3255 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3256 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3257 if (VA.getLocReg() == X86::XMM1)
3258 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3259 else
3260 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3261 } else if (!Subtarget.hasSSE2() &&
3262 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3263 CopyVT == MVT::f64) {
3264 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3265 if (VA.getLocReg() == X86::XMM1)
3266 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3267 else
3268 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3269 }
3270
3271 // If we prefer to use the value in xmm registers, copy it out as f80 and
3272 // use a truncate to move it from fp stack reg to xmm reg.
3273 bool RoundAfterCopy = false;
3274 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3275 isScalarFPTypeInSSEReg(VA.getValVT())) {
3276 if (!Subtarget.hasX87())
3277 report_fatal_error("X87 register return with X87 disabled");
3278 CopyVT = MVT::f80;
3279 RoundAfterCopy = (CopyVT != VA.getLocVT());
3280 }
3281
3282 SDValue Val;
3283 if (VA.needsCustom()) {
3284 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3285, __extension__ __PRETTY_FUNCTION__))
3285 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3285, __extension__ __PRETTY_FUNCTION__))
;
3286 Val =
3287 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3288 } else {
3289 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3290 .getValue(1);
3291 Val = Chain.getValue(0);
3292 InFlag = Chain.getValue(2);
3293 }
3294
3295 if (RoundAfterCopy)
3296 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3297 // This truncation won't change the value.
3298 DAG.getIntPtrConstant(1, dl));
3299
3300 if (VA.isExtInLoc()) {
3301 if (VA.getValVT().isVector() &&
3302 VA.getValVT().getScalarType() == MVT::i1 &&
3303 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3304 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3305 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3306 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3307 } else
3308 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3309 }
3310
3311 if (VA.getLocInfo() == CCValAssign::BCvt)
3312 Val = DAG.getBitcast(VA.getValVT(), Val);
3313
3314 InVals.push_back(Val);
3315 }
3316
3317 return Chain;
3318}
3319
3320//===----------------------------------------------------------------------===//
3321// C & StdCall & Fast Calling Convention implementation
3322//===----------------------------------------------------------------------===//
3323// StdCall calling convention seems to be standard for many Windows' API
3324// routines and around. It differs from C calling convention just a little:
3325// callee should clean up the stack, not caller. Symbols should be also
3326// decorated in some fancy way :) It doesn't support any vector arguments.
3327// For info on fast calling convention see Fast Calling Convention (tail call)
3328// implementation LowerX86_32FastCCCallTo.
3329
3330/// CallIsStructReturn - Determines whether a call uses struct return
3331/// semantics.
3332enum StructReturnType {
3333 NotStructReturn,
3334 RegStructReturn,
3335 StackStructReturn
3336};
3337static StructReturnType
3338callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3339 if (Outs.empty())
3340 return NotStructReturn;
3341
3342 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3343 if (!Flags.isSRet())
3344 return NotStructReturn;
3345 if (Flags.isInReg() || IsMCU)
3346 return RegStructReturn;
3347 return StackStructReturn;
3348}
3349
3350/// Determines whether a function uses struct return semantics.
3351static StructReturnType
3352argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3353 if (Ins.empty())
3354 return NotStructReturn;
3355
3356 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3357 if (!Flags.isSRet())
3358 return NotStructReturn;
3359 if (Flags.isInReg() || IsMCU)
3360 return RegStructReturn;
3361 return StackStructReturn;
3362}
3363
3364/// Make a copy of an aggregate at address specified by "Src" to address
3365/// "Dst" with size and alignment information specified by the specific
3366/// parameter attribute. The copy will be passed as a byval function parameter.
3367static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3368 SDValue Chain, ISD::ArgFlagsTy Flags,
3369 SelectionDAG &DAG, const SDLoc &dl) {
3370 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3371
3372 return DAG.getMemcpy(
3373 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3374 /*isVolatile*/ false, /*AlwaysInline=*/true,
3375 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3376}
3377
3378/// Return true if the calling convention is one that we can guarantee TCO for.
3379static bool canGuaranteeTCO(CallingConv::ID CC) {
3380 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3381 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3382 CC == CallingConv::HHVM || CC == CallingConv::Tail ||
3383 CC == CallingConv::SwiftTail);
3384}
3385
3386/// Return true if we might ever do TCO for calls with this calling convention.
3387static bool mayTailCallThisCC(CallingConv::ID CC) {
3388 switch (CC) {
3389 // C calling conventions:
3390 case CallingConv::C:
3391 case CallingConv::Win64:
3392 case CallingConv::X86_64_SysV:
3393 // Callee pop conventions:
3394 case CallingConv::X86_ThisCall:
3395 case CallingConv::X86_StdCall:
3396 case CallingConv::X86_VectorCall:
3397 case CallingConv::X86_FastCall:
3398 // Swift:
3399 case CallingConv::Swift:
3400 return true;
3401 default:
3402 return canGuaranteeTCO(CC);
3403 }
3404}
3405
3406/// Return true if the function is being made into a tailcall target by
3407/// changing its ABI.
3408static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3409 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3410 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3411}
3412
3413bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3414 if (!CI->isTailCall())
3415 return false;
3416
3417 CallingConv::ID CalleeCC = CI->getCallingConv();
3418 if (!mayTailCallThisCC(CalleeCC))
3419 return false;
3420
3421 return true;
3422}
3423
3424SDValue
3425X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3426 const SmallVectorImpl<ISD::InputArg> &Ins,
3427 const SDLoc &dl, SelectionDAG &DAG,
3428 const CCValAssign &VA,
3429 MachineFrameInfo &MFI, unsigned i) const {
3430 // Create the nodes corresponding to a load from this parameter slot.
3431 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3432 bool AlwaysUseMutable = shouldGuaranteeTCO(
3433 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3434 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3435 EVT ValVT;
3436 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3437
3438 // If value is passed by pointer we have address passed instead of the value
3439 // itself. No need to extend if the mask value and location share the same
3440 // absolute size.
3441 bool ExtendedInMem =
3442 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3443 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3444
3445 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3446 ValVT = VA.getLocVT();
3447 else
3448 ValVT = VA.getValVT();
3449
3450 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3451 // changed with more analysis.
3452 // In case of tail call optimization mark all arguments mutable. Since they
3453 // could be overwritten by lowering of arguments in case of a tail call.
3454 if (Flags.isByVal()) {
3455 unsigned Bytes = Flags.getByValSize();
3456 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3457
3458 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3459 // can be improved with deeper analysis.
3460 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3461 /*isAliased=*/true);
3462 return DAG.getFrameIndex(FI, PtrVT);
3463 }
3464
3465 EVT ArgVT = Ins[i].ArgVT;
3466
3467 // If this is a vector that has been split into multiple parts, and the
3468 // scalar size of the parts don't match the vector element size, then we can't
3469 // elide the copy. The parts will have padding between them instead of being
3470 // packed like a vector.
3471 bool ScalarizedAndExtendedVector =
3472 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3473 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3474
3475 // This is an argument in memory. We might be able to perform copy elision.
3476 // If the argument is passed directly in memory without any extension, then we
3477 // can perform copy elision. Large vector types, for example, may be passed
3478 // indirectly by pointer.
3479 if (Flags.isCopyElisionCandidate() &&
3480 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3481 !ScalarizedAndExtendedVector) {
3482 SDValue PartAddr;
3483 if (Ins[i].PartOffset == 0) {
3484 // If this is a one-part value or the first part of a multi-part value,
3485 // create a stack object for the entire argument value type and return a
3486 // load from our portion of it. This assumes that if the first part of an
3487 // argument is in memory, the rest will also be in memory.
3488 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3489 /*IsImmutable=*/false);
3490 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3491 return DAG.getLoad(
3492 ValVT, dl, Chain, PartAddr,
3493 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3494 } else {
3495 // This is not the first piece of an argument in memory. See if there is
3496 // already a fixed stack object including this offset. If so, assume it
3497 // was created by the PartOffset == 0 branch above and create a load from
3498 // the appropriate offset into it.
3499 int64_t PartBegin = VA.getLocMemOffset();
3500 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3501 int FI = MFI.getObjectIndexBegin();
3502 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3503 int64_t ObjBegin = MFI.getObjectOffset(FI);
3504 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3505 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3506 break;
3507 }
3508 if (MFI.isFixedObjectIndex(FI)) {
3509 SDValue Addr =
3510 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3511 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3512 return DAG.getLoad(
3513 ValVT, dl, Chain, Addr,
3514 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3515 Ins[i].PartOffset));
3516 }
3517 }
3518 }
3519
3520 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3521 VA.getLocMemOffset(), isImmutable);
3522
3523 // Set SExt or ZExt flag.
3524 if (VA.getLocInfo() == CCValAssign::ZExt) {
3525 MFI.setObjectZExt(FI, true);
3526 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3527 MFI.setObjectSExt(FI, true);
3528 }
3529
3530 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3531 SDValue Val = DAG.getLoad(
3532 ValVT, dl, Chain, FIN,
3533 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3534 return ExtendedInMem
3535 ? (VA.getValVT().isVector()
3536 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3537 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3538 : Val;
3539}
3540
3541// FIXME: Get this from tablegen.
3542static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3543 const X86Subtarget &Subtarget) {
3544 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3544, __extension__ __PRETTY_FUNCTION__))
;
3545
3546 if (Subtarget.isCallingConvWin64(CallConv)) {
3547 static const MCPhysReg GPR64ArgRegsWin64[] = {
3548 X86::RCX, X86::RDX, X86::R8, X86::R9
3549 };
3550 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3551 }
3552
3553 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3554 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3555 };
3556 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3557}
3558
3559// FIXME: Get this from tablegen.
3560static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3561 CallingConv::ID CallConv,
3562 const X86Subtarget &Subtarget) {
3563 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3563, __extension__ __PRETTY_FUNCTION__))
;
3564 if (Subtarget.isCallingConvWin64(CallConv)) {
3565 // The XMM registers which might contain var arg parameters are shadowed
3566 // in their paired GPR. So we only need to save the GPR to their home
3567 // slots.
3568 // TODO: __vectorcall will change this.
3569 return None;
3570 }
3571
3572 bool isSoftFloat = Subtarget.useSoftFloat();
3573 if (isSoftFloat || !Subtarget.hasSSE1())
3574 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3575 // registers.
3576 return None;
3577
3578 static const MCPhysReg XMMArgRegs64Bit[] = {
3579 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3580 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3581 };
3582 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3583}
3584
3585#ifndef NDEBUG
3586static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3587 return llvm::is_sorted(
3588 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3589 return A.getValNo() < B.getValNo();
3590 });
3591}
3592#endif
3593
3594namespace {
3595/// This is a helper class for lowering variable arguments parameters.
3596class VarArgsLoweringHelper {
3597public:
3598 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3599 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3600 CallingConv::ID CallConv, CCState &CCInfo)
3601 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3602 TheMachineFunction(DAG.getMachineFunction()),
3603 TheFunction(TheMachineFunction.getFunction()),
3604 FrameInfo(TheMachineFunction.getFrameInfo()),
3605 FrameLowering(*Subtarget.getFrameLowering()),
3606 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3607 CCInfo(CCInfo) {}
3608
3609 // Lower variable arguments parameters.
3610 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3611
3612private:
3613 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3614
3615 void forwardMustTailParameters(SDValue &Chain);
3616
3617 bool is64Bit() const { return Subtarget.is64Bit(); }
3618 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3619
3620 X86MachineFunctionInfo *FuncInfo;
3621 const SDLoc &DL;
3622 SelectionDAG &DAG;
3623 const X86Subtarget &Subtarget;
3624 MachineFunction &TheMachineFunction;
3625 const Function &TheFunction;
3626 MachineFrameInfo &FrameInfo;
3627 const TargetFrameLowering &FrameLowering;
3628 const TargetLowering &TargLowering;
3629 CallingConv::ID CallConv;
3630 CCState &CCInfo;
3631};
3632} // namespace
3633
3634void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3635 SDValue &Chain, unsigned StackSize) {
3636 // If the function takes variable number of arguments, make a frame index for
3637 // the start of the first vararg value... for expansion of llvm.va_start. We
3638 // can skip this if there are no va_start calls.
3639 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3640 CallConv != CallingConv::X86_ThisCall)) {
3641 FuncInfo->setVarArgsFrameIndex(
3642 FrameInfo.CreateFixedObject(1, StackSize, true));
3643 }
3644
3645 // 64-bit calling conventions support varargs and register parameters, so we
3646 // have to do extra work to spill them in the prologue.
3647 if (is64Bit()) {
3648 // Find the first unallocated argument registers.
3649 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3650 ArrayRef<MCPhysReg> ArgXMMs =
3651 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3652 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3653 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3654
3655 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3656, __extension__ __PRETTY_FUNCTION__))
3656 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3656, __extension__ __PRETTY_FUNCTION__))
;
3657
3658 if (isWin64()) {
3659 // Get to the caller-allocated home save location. Add 8 to account
3660 // for the return address.
3661 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3662 FuncInfo->setRegSaveFrameIndex(
3663 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3664 // Fixup to set vararg frame on shadow area (4 x i64).
3665 if (NumIntRegs < 4)
3666 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3667 } else {
3668 // For X86-64, if there are vararg parameters that are passed via
3669 // registers, then we must store them to their spots on the stack so
3670 // they may be loaded by dereferencing the result of va_next.
3671 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3672 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3673 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3674 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3675 }
3676
3677 SmallVector<SDValue, 6>
3678 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3679 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3680 // keeping live input value
3681 SDValue ALVal; // if applicable keeps SDValue for %al register
3682
3683 // Gather all the live in physical registers.
3684 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3685 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3686 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3687 }
3688 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3689 if (!AvailableXmms.empty()) {
3690 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3691 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3692 for (MCPhysReg Reg : AvailableXmms) {
3693 // FastRegisterAllocator spills virtual registers at basic
3694 // block boundary. That leads to usages of xmm registers
3695 // outside of check for %al. Pass physical registers to
3696 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
3697 TheMachineFunction.getRegInfo().addLiveIn(Reg);
3698 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
3699 }
3700 }
3701
3702 // Store the integer parameter registers.
3703 SmallVector<SDValue, 8> MemOps;
3704 SDValue RSFIN =
3705 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3706 TargLowering.getPointerTy(DAG.getDataLayout()));
3707 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3708 for (SDValue Val : LiveGPRs) {
3709 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3710 TargLowering.getPointerTy(DAG.getDataLayout()),
3711 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3712 SDValue Store =
3713 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3714 MachinePointerInfo::getFixedStack(
3715 DAG.getMachineFunction(),
3716 FuncInfo->getRegSaveFrameIndex(), Offset));
3717 MemOps.push_back(Store);
3718 Offset += 8;
3719 }
3720
3721 // Now store the XMM (fp + vector) parameter registers.
3722 if (!LiveXMMRegs.empty()) {
3723 SmallVector<SDValue, 12> SaveXMMOps;
3724 SaveXMMOps.push_back(Chain);
3725 SaveXMMOps.push_back(ALVal);
3726 SaveXMMOps.push_back(
3727 DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
3728 SaveXMMOps.push_back(
3729 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3730 llvm::append_range(SaveXMMOps, LiveXMMRegs);
3731 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
3732 MVT::Other, SaveXMMOps));
3733 }
3734
3735 if (!MemOps.empty())
3736 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3737 }
3738}
3739
3740void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3741 // Find the largest legal vector type.
3742 MVT VecVT = MVT::Other;
3743 // FIXME: Only some x86_32 calling conventions support AVX512.
3744 if (Subtarget.useAVX512Regs() &&
3745 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3746 CallConv == CallingConv::Intel_OCL_BI)))
3747 VecVT = MVT::v16f32;
3748 else if (Subtarget.hasAVX())
3749 VecVT = MVT::v8f32;
3750 else if (Subtarget.hasSSE2())
3751 VecVT = MVT::v4f32;
3752
3753 // We forward some GPRs and some vector types.
3754 SmallVector<MVT, 2> RegParmTypes;
3755 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3756 RegParmTypes.push_back(IntVT);
3757 if (VecVT != MVT::Other)
3758 RegParmTypes.push_back(VecVT);
3759
3760 // Compute the set of forwarded registers. The rest are scratch.
3761 SmallVectorImpl<ForwardedRegister> &Forwards =
3762 FuncInfo->getForwardedMustTailRegParms();
3763 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3764
3765 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3766 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3767 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3768 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3769 }
3770
3771 // Copy all forwards from physical to virtual registers.
3772 for (ForwardedRegister &FR : Forwards) {
3773 // FIXME: Can we use a less constrained schedule?
3774 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3775 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3776 TargLowering.getRegClassFor(FR.VT));
3777 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3778 }
3779}
3780
3781void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3782 unsigned StackSize) {
3783 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3784 // If necessary, it would be set into the correct value later.
3785 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3786 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3787
3788 if (FrameInfo.hasVAStart())
3789 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3790
3791 if (FrameInfo.hasMustTailInVarArgFunc())
3792 forwardMustTailParameters(Chain);
3793}
3794
3795SDValue X86TargetLowering::LowerFormalArguments(
3796 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3797 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3798 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3799 MachineFunction &MF = DAG.getMachineFunction();
3800 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3801
3802 const Function &F = MF.getFunction();
3803 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3804 F.getName() == "main")
3805 FuncInfo->setForceFramePointer(true);
3806
3807 MachineFrameInfo &MFI = MF.getFrameInfo();
3808 bool Is64Bit = Subtarget.is64Bit();
3809 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3810
3811 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3813, __extension__ __PRETTY_FUNCTION__))
3812 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3813, __extension__ __PRETTY_FUNCTION__))
3813 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3813, __extension__ __PRETTY_FUNCTION__))
;
3814
3815 // Assign locations to all of the incoming arguments.
3816 SmallVector<CCValAssign, 16> ArgLocs;
3817 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3818
3819 // Allocate shadow area for Win64.
3820 if (IsWin64)
3821 CCInfo.AllocateStack(32, Align(8));
3822
3823 CCInfo.AnalyzeArguments(Ins, CC_X86);
3824
3825 // In vectorcall calling convention a second pass is required for the HVA
3826 // types.
3827 if (CallingConv::X86_VectorCall == CallConv) {
3828 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3829 }
3830
3831 // The next loop assumes that the locations are in the same order of the
3832 // input arguments.
3833 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3834, __extension__ __PRETTY_FUNCTION__))
3834 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3834, __extension__ __PRETTY_FUNCTION__))
;
3835
3836 SDValue ArgValue;
3837 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3838 ++I, ++InsIndex) {
3839 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3839, __extension__ __PRETTY_FUNCTION__))
;
3840 CCValAssign &VA = ArgLocs[I];
3841
3842 if (VA.isRegLoc()) {
3843 EVT RegVT = VA.getLocVT();
3844 if (VA.needsCustom()) {
3845 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3847, __extension__ __PRETTY_FUNCTION__))
3846 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3847, __extension__ __PRETTY_FUNCTION__))
3847 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3847, __extension__ __PRETTY_FUNCTION__))
;
3848
3849 // v64i1 values, in regcall calling convention, that are
3850 // compiled to 32 bit arch, are split up into two registers.
3851 ArgValue =
3852 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3853 } else {
3854 const TargetRegisterClass *RC;
3855 if (RegVT == MVT::i8)
3856 RC = &X86::GR8RegClass;
3857 else if (RegVT == MVT::i16)
3858 RC = &X86::GR16RegClass;
3859 else if (RegVT == MVT::i32)
3860 RC = &X86::GR32RegClass;
3861 else if (Is64Bit && RegVT == MVT::i64)
3862 RC = &X86::GR64RegClass;
3863 else if (RegVT == MVT::f16)
3864 RC = &X86::FR16XRegClass;
3865 else if (RegVT == MVT::f32)
3866 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3867 else if (RegVT == MVT::f64)
3868 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3869 else if (RegVT == MVT::f80)
3870 RC = &X86::RFP80RegClass;
3871 else if (RegVT == MVT::f128)
3872 RC = &X86::VR128RegClass;
3873 else if (RegVT.is512BitVector())
3874 RC = &X86::VR512RegClass;
3875 else if (RegVT.is256BitVector())
3876 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3877 else if (RegVT.is128BitVector())
3878 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3879 else if (RegVT == MVT::x86mmx)
3880 RC = &X86::VR64RegClass;
3881 else if (RegVT == MVT::v1i1)
3882 RC = &X86::VK1RegClass;
3883 else if (RegVT == MVT::v8i1)
3884 RC = &X86::VK8RegClass;
3885 else if (RegVT == MVT::v16i1)
3886 RC = &X86::VK16RegClass;
3887 else if (RegVT == MVT::v32i1)
3888 RC = &X86::VK32RegClass;
3889 else if (RegVT == MVT::v64i1)
3890 RC = &X86::VK64RegClass;
3891 else
3892 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3892)
;
3893
3894 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3895 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3896 }
3897
3898 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3899 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3900 // right size.
3901 if (VA.getLocInfo() == CCValAssign::SExt)
3902 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3903 DAG.getValueType(VA.getValVT()));
3904 else if (VA.getLocInfo() == CCValAssign::ZExt)
3905 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3906 DAG.getValueType(VA.getValVT()));
3907 else if (VA.getLocInfo() == CCValAssign::BCvt)
3908 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3909
3910 if (VA.isExtInLoc()) {
3911 // Handle MMX values passed in XMM regs.
3912 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3913 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3914 else if (VA.getValVT().isVector() &&
3915 VA.getValVT().getScalarType() == MVT::i1 &&
3916 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3917 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3918 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3919 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3920 } else
3921 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3922 }
3923 } else {
3924 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3924, __extension__ __PRETTY_FUNCTION__))
;
3925 ArgValue =
3926 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3927 }
3928
3929 // If value is passed via pointer - do a load.
3930 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3931 ArgValue =
3932 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3933
3934 InVals.push_back(ArgValue);
3935 }
3936
3937 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3938 if (Ins[I].Flags.isSwiftAsync()) {
3939 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
3940 if (Subtarget.is64Bit())
3941 X86FI->setHasSwiftAsyncContext(true);
3942 else {
3943 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
3944 X86FI->setSwiftAsyncContextFrameIdx(FI);
3945 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
3946 DAG.getFrameIndex(FI, MVT::i32),
3947 MachinePointerInfo::getFixedStack(MF, FI));
3948 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
3949 }
3950 }
3951
3952 // Swift calling convention does not require we copy the sret argument
3953 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3954 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
3955 continue;
3956
3957 // All x86 ABIs require that for returning structs by value we copy the
3958 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3959 // the argument into a virtual register so that we can access it from the
3960 // return points.
3961 if (Ins[I].Flags.isSRet()) {
3962 assert(!FuncInfo->getSRetReturnReg() &&(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3963, __extension__ __PRETTY_FUNCTION__))
3963 "SRet return has already been set")(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3963, __extension__ __PRETTY_FUNCTION__))
;
3964 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3965 Register Reg =
3966 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3967 FuncInfo->setSRetReturnReg(Reg);
3968 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3969 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3970 break;
3971 }
3972 }
3973
3974 unsigned StackSize = CCInfo.getNextStackOffset();
3975 // Align stack specially for tail calls.
3976 if (shouldGuaranteeTCO(CallConv,
3977 MF.getTarget().Options.GuaranteedTailCallOpt))
3978 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3979
3980 if (IsVarArg)
3981 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
3982 .lowerVarArgsParameters(Chain, StackSize);
3983
3984 // Some CCs need callee pop.
3985 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
3986 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3987 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3988 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3989 // X86 interrupts must pop the error code (and the alignment padding) if
3990 // present.
3991 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3992 } else {
3993 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3994 // If this is an sret function, the return should pop the hidden pointer.
3995 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3996 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3997 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3998 FuncInfo->setBytesToPopOnReturn(4);
3999 }
4000
4001 if (!Is64Bit) {
4002 // RegSaveFrameIndex is X86-64 only.
4003 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4004 }
4005
4006 FuncInfo->setArgumentStackSize(StackSize);
4007
4008 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
4009 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
4010 if (Personality == EHPersonality::CoreCLR) {
4011 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4011, __extension__ __PRETTY_FUNCTION__))
;
4012 // TODO: Add a mechanism to frame lowering that will allow us to indicate
4013 // that we'd prefer this slot be allocated towards the bottom of the frame
4014 // (i.e. near the stack pointer after allocating the frame). Every
4015 // funclet needs a copy of this slot in its (mostly empty) frame, and the
4016 // offset from the bottom of this and each funclet's frame must be the
4017 // same, so the size of funclets' (mostly empty) frames is dictated by
4018 // how far this slot is from the bottom (since they allocate just enough
4019 // space to accommodate holding this slot at the correct offset).
4020 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
4021 EHInfo->PSPSymFrameIdx = PSPSymFI;
4022 }
4023 }
4024
4025 if (CallConv == CallingConv::X86_RegCall ||
4026 F.hasFnAttribute("no_caller_saved_registers")) {
4027 MachineRegisterInfo &MRI = MF.getRegInfo();
4028 for (std::pair<Register, Register> Pair : MRI.liveins())
4029 MRI.disableCalleeSavedRegister(Pair.first);
4030 }
4031
4032 return Chain;
4033}
4034
4035SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
4036 SDValue Arg, const SDLoc &dl,
4037 SelectionDAG &DAG,
4038 const CCValAssign &VA,
4039 ISD::ArgFlagsTy Flags,
4040 bool isByVal) const {
4041 unsigned LocMemOffset = VA.getLocMemOffset();
4042 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4043 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4044 StackPtr, PtrOff);
4045 if (isByVal)
4046 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
4047
4048 return DAG.getStore(
4049 Chain, dl, Arg, PtrOff,
4050 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
4051}
4052
4053/// Emit a load of return address if tail call
4054/// optimization is performed and it is required.
4055SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
4056 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
4057 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
4058 // Adjust the Return address stack slot.
4059 EVT VT = getPointerTy(DAG.getDataLayout());
4060 OutRetAddr = getReturnAddressFrameIndex(DAG);
4061
4062 // Load the "old" Return address.
4063 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
4064 return SDValue(OutRetAddr.getNode(), 1);
4065}
4066
4067/// Emit a store of the return address if tail call
4068/// optimization is performed and it is required (FPDiff!=0).
4069static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
4070 SDValue Chain, SDValue RetAddrFrIdx,
4071 EVT PtrVT, unsigned SlotSize,
4072 int FPDiff, const SDLoc &dl) {
4073 // Store the return address to the appropriate stack slot.
4074 if (!FPDiff) return Chain;
4075 // Calculate the new stack slot for the return address.
4076 int NewReturnAddrFI =
4077 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
4078 false);
4079 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
4080 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
4081 MachinePointerInfo::getFixedStack(
4082 DAG.getMachineFunction(), NewReturnAddrFI));
4083 return Chain;
4084}
4085
4086/// Returns a vector_shuffle mask for an movs{s|d}, movd
4087/// operation of specified width.
4088static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
4089 SDValue V2) {
4090 unsigned NumElems = VT.getVectorNumElements();
4091 SmallVector<int, 8> Mask;
4092 Mask.push_back(NumElems);
4093 for (unsigned i = 1; i != NumElems; ++i)
4094 Mask.push_back(i);
4095 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
4096}
4097
4098SDValue
4099X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4100 SmallVectorImpl<SDValue> &InVals) const {
4101 SelectionDAG &DAG = CLI.DAG;
4102 SDLoc &dl = CLI.DL;
4103 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4104 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4105 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4106 SDValue Chain = CLI.Chain;
4107 SDValue Callee = CLI.Callee;
4108 CallingConv::ID CallConv = CLI.CallConv;
4109 bool &isTailCall = CLI.IsTailCall;
4110 bool isVarArg = CLI.IsVarArg;
4111 const auto *CB = CLI.CB;
4112
4113 MachineFunction &MF = DAG.getMachineFunction();
4114 bool Is64Bit = Subtarget.is64Bit();
4115 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4116 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
4117 bool IsSibcall = false;
4118 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
4119 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
4120 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
4121 bool HasNCSR = (CB && isa<CallInst>(CB) &&
4122 CB->hasFnAttr("no_caller_saved_registers"));
4123 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
4124 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
4125 const Module *M = MF.getMMI().getModule();
4126 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
4127
4128 MachineFunction::CallSiteInfo CSInfo;
4129 if (CallConv == CallingConv::X86_INTR)
4130 report_fatal_error("X86 interrupts may not be called directly");
4131
4132 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
4133 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
4134 // If we are using a GOT, disable tail calls to external symbols with
4135 // default visibility. Tail calling such a symbol requires using a GOT
4136 // relocation, which forces early binding of the symbol. This breaks code
4137 // that require lazy function symbol resolution. Using musttail or
4138 // GuaranteedTailCallOpt will override this.
4139 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4140 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
4141 G->getGlobal()->hasDefaultVisibility()))
4142 isTailCall = false;
4143 }
4144
4145
4146 if (isTailCall && !IsMustTail) {
4147 // Check if it's really possible to do a tail call.
4148 isTailCall = IsEligibleForTailCallOptimization(
4149 Callee, CallConv, SR == StackStructReturn, isVarArg, CLI.RetTy, Outs,
4150 OutVals, Ins, DAG);
4151
4152 // Sibcalls are automatically detected tailcalls which do not require
4153 // ABI changes.
4154 if (!IsGuaranteeTCO && isTailCall)
4155 IsSibcall = true;
4156
4157 if (isTailCall)
4158 ++NumTailCalls;
4159 }
4160
4161 if (IsMustTail && !isTailCall)
4162 report_fatal_error("failed to perform tail call elimination on a call "
4163 "site marked musttail");
4164
4165 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4166, __extension__ __PRETTY_FUNCTION__))
4166 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4166, __extension__ __PRETTY_FUNCTION__))
;
4167
4168 // Analyze operands of the call, assigning locations to each operand.
4169 SmallVector<CCValAssign, 16> ArgLocs;
4170 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4171
4172 // Allocate shadow area for Win64.
4173 if (IsWin64)
4174 CCInfo.AllocateStack(32, Align(8));
4175
4176 CCInfo.AnalyzeArguments(Outs, CC_X86);
4177
4178 // In vectorcall calling convention a second pass is required for the HVA
4179 // types.
4180 if (CallingConv::X86_VectorCall == CallConv) {
4181 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
4182 }
4183
4184 // Get a count of how many bytes are to be pushed on the stack.
4185 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
4186 if (IsSibcall)
4187 // This is a sibcall. The memory operands are available in caller's
4188 // own caller's stack.
4189 NumBytes = 0;
4190 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4191 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4192
4193 int FPDiff = 0;
4194 if (isTailCall &&
4195 shouldGuaranteeTCO(CallConv,
4196 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4197 // Lower arguments at fp - stackoffset + fpdiff.
4198 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4199
4200 FPDiff = NumBytesCallerPushed - NumBytes;
4201
4202 // Set the delta of movement of the returnaddr stackslot.
4203 // But only set if delta is greater than previous delta.
4204 if (FPDiff < X86Info->getTCReturnAddrDelta())
4205 X86Info->setTCReturnAddrDelta(FPDiff);
4206 }
4207
4208 unsigned NumBytesToPush = NumBytes;
4209 unsigned NumBytesToPop = NumBytes;
4210
4211 // If we have an inalloca argument, all stack space has already been allocated
4212 // for us and be right at the top of the stack. We don't support multiple
4213 // arguments passed in memory when using inalloca.
4214 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4215 NumBytesToPush = 0;
4216 if (!ArgLocs.back().isMemLoc())
4217 report_fatal_error("cannot use inalloca attribute on a register "
4218 "parameter");
4219 if (ArgLocs.back().getLocMemOffset() != 0)
4220 report_fatal_error("any parameter with the inalloca attribute must be "
4221 "the only memory argument");
4222 } else if (CLI.IsPreallocated) {
4223 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4225, __extension__ __PRETTY_FUNCTION__))
4224 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4225, __extension__ __PRETTY_FUNCTION__))
4225 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4225, __extension__ __PRETTY_FUNCTION__))
;
4226 SmallVector<size_t, 4> PreallocatedOffsets;
4227 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4228 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4229 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4230 }
4231 }
4232 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4233 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4234 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4235 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4236 NumBytesToPush = 0;
4237 }
4238
4239 if (!IsSibcall && !IsMustTail)
4240 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4241 NumBytes - NumBytesToPush, dl);
4242
4243 SDValue RetAddrFrIdx;
4244 // Load return address for tail calls.
4245 if (isTailCall && FPDiff)
4246 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4247 Is64Bit, FPDiff, dl);
4248
4249 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4250 SmallVector<SDValue, 8> MemOpChains;
4251 SDValue StackPtr;
4252
4253 // The next loop assumes that the locations are in the same order of the
4254 // input arguments.
4255 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4256, __extension__ __PRETTY_FUNCTION__))
4256 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4256, __extension__ __PRETTY_FUNCTION__))
;
4257
4258 // Walk the register/memloc assignments, inserting copies/loads. In the case
4259 // of tail call optimization arguments are handle later.
4260 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4261 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4262 ++I, ++OutIndex) {
4263 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4263, __extension__ __PRETTY_FUNCTION__))
;
4264 // Skip inalloca/preallocated arguments, they have already been written.
4265 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4266 if (Flags.isInAlloca() || Flags.isPreallocated())
4267 continue;
4268
4269 CCValAssign &VA = ArgLocs[I];
4270 EVT RegVT = VA.getLocVT();
4271 SDValue Arg = OutVals[OutIndex];
4272 bool isByVal = Flags.isByVal();
4273
4274 // Promote the value if needed.
4275 switch (VA.getLocInfo()) {
4276 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4276)
;
4277 case CCValAssign::Full: break;
4278 case CCValAssign::SExt:
4279 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4280 break;
4281 case CCValAssign::ZExt:
4282 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4283 break;
4284 case CCValAssign::AExt:
4285 if (Arg.getValueType().isVector() &&
4286 Arg.getValueType().getVectorElementType() == MVT::i1)
4287 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4288 else if (RegVT.is128BitVector()) {
4289 // Special case: passing MMX values in XMM registers.
4290 Arg = DAG.getBitcast(MVT::i64, Arg);
4291 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4292 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4293 } else
4294 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4295 break;
4296 case CCValAssign::BCvt:
4297 Arg = DAG.getBitcast(RegVT, Arg);
4298 break;
4299 case CCValAssign::Indirect: {
4300 if (isByVal) {
4301 // Memcpy the argument to a temporary stack slot to prevent
4302 // the caller from seeing any modifications the callee may make
4303 // as guaranteed by the `byval` attribute.
4304 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4305 Flags.getByValSize(),
4306 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4307 SDValue StackSlot =
4308 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4309 Chain =
4310 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4311 // From now on treat this as a regular pointer
4312 Arg = StackSlot;
4313 isByVal = false;
4314 } else {
4315 // Store the argument.
4316 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4317 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4318 Chain = DAG.getStore(
4319 Chain, dl, Arg, SpillSlot,
4320 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4321 Arg = SpillSlot;
4322 }
4323 break;
4324 }
4325 }
4326
4327 if (VA.needsCustom()) {
4328 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4329, __extension__ __PRETTY_FUNCTION__))
4329 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4329, __extension__ __PRETTY_FUNCTION__))
;
4330 // Split v64i1 value into two registers
4331 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4332 } else if (VA.isRegLoc()) {
4333 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4334 const TargetOptions &Options = DAG.getTarget().Options;
4335 if (Options.EmitCallSiteInfo)
4336 CSInfo.emplace_back(VA.getLocReg(), I);
4337 if (isVarArg && IsWin64) {
4338 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4339 // shadow reg if callee is a varargs function.
4340 Register ShadowReg;
4341 switch (VA.getLocReg()) {
4342 case X86::XMM0: ShadowReg = X86::RCX; break;
4343 case X86::XMM1: ShadowReg = X86::RDX; break;
4344 case X86::XMM2: ShadowReg = X86::R8; break;
4345 case X86::XMM3: ShadowReg = X86::R9; break;
4346 }
4347 if (ShadowReg)
4348 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4349 }
4350 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4351 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4351, __extension__ __PRETTY_FUNCTION__))
;
4352 if (!StackPtr.getNode())
4353 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4354 getPointerTy(DAG.getDataLayout()));
4355 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4356 dl, DAG, VA, Flags, isByVal));
4357 }
4358 }
4359
4360 if (!MemOpChains.empty())
4361 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4362
4363 if (Subtarget.isPICStyleGOT()) {
4364 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4365 // GOT pointer (except regcall).
4366 if (!isTailCall) {
4367 // Indirect call with RegCall calling convertion may use up all the
4368 // general registers, so it is not suitable to bind EBX reister for
4369 // GOT address, just let register allocator handle it.
4370 if (CallConv != CallingConv::X86_RegCall)
4371 RegsToPass.push_back(std::make_pair(
4372 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4373 getPointerTy(DAG.getDataLayout()))));
4374 } else {
4375 // If we are tail calling and generating PIC/GOT style code load the
4376 // address of the callee into ECX. The value in ecx is used as target of
4377 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4378 // for tail calls on PIC/GOT architectures. Normally we would just put the
4379 // address of GOT into ebx and then call target@PLT. But for tail calls
4380 // ebx would be restored (since ebx is callee saved) before jumping to the
4381 // target@PLT.
4382
4383 // Note: The actual moving to ECX is done further down.
4384 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4385 if (G && !G->getGlobal()->hasLocalLinkage() &&
4386 G->getGlobal()->hasDefaultVisibility())
4387 Callee = LowerGlobalAddress(Callee, DAG);
4388 else if (isa<ExternalSymbolSDNode>(Callee))
4389 Callee = LowerExternalSymbol(Callee, DAG);
4390 }
4391 }
4392
4393 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4394 // From AMD64 ABI document:
4395 // For calls that may call functions that use varargs or stdargs
4396 // (prototype-less calls or calls to functions containing ellipsis (...) in
4397 // the declaration) %al is used as hidden argument to specify the number
4398 // of SSE registers used. The contents of %al do not need to match exactly
4399 // the number of registers, but must be an ubound on the number of SSE
4400 // registers used and is in the range 0 - 8 inclusive.
4401
4402 // Count the number of XMM registers allocated.
4403 static const MCPhysReg XMMArgRegs[] = {
4404 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4405 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4406 };
4407 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4408 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4409, __extension__ __PRETTY_FUNCTION__))
4409 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4409, __extension__ __PRETTY_FUNCTION__))
;
4410 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4411 DAG.getConstant(NumXMMRegs, dl,
4412 MVT::i8)));
4413 }
4414
4415 if (isVarArg && IsMustTail) {
4416 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4417 for (const auto &F : Forwards) {
4418 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4419 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4420 }
4421 }
4422
4423 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4424 // don't need this because the eligibility check rejects calls that require
4425 // shuffling arguments passed in memory.
4426 if (!IsSibcall && isTailCall) {
4427 // Force all the incoming stack arguments to be loaded from the stack
4428 // before any new outgoing arguments are stored to the stack, because the
4429 // outgoing stack slots may alias the incoming argument stack slots, and
4430 // the alias isn't otherwise explicit. This is slightly more conservative
4431 // than necessary, because it means that each store effectively depends
4432 // on every argument instead of just those arguments it would clobber.
4433 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4434
4435 SmallVector<SDValue, 8> MemOpChains2;
4436 SDValue FIN;
4437 int FI = 0;
4438 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4439 ++I, ++OutsIndex) {
4440 CCValAssign &VA = ArgLocs[I];
4441
4442 if (VA.isRegLoc()) {
4443 if (VA.needsCustom()) {
4444 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4445, __extension__ __PRETTY_FUNCTION__))
4445 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4445, __extension__ __PRETTY_FUNCTION__))
;
4446 // This means that we are in special case where one argument was
4447 // passed through two register locations - Skip the next location
4448 ++I;
4449 }
4450
4451 continue;
4452 }
4453
4454 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4454, __extension__ __PRETTY_FUNCTION__))
;
4455 SDValue Arg = OutVals[OutsIndex];
4456 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4457 // Skip inalloca/preallocated arguments. They don't require any work.
4458 if (Flags.isInAlloca() || Flags.isPreallocated())
4459 continue;
4460 // Create frame index.
4461 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4462 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4463 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4464 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4465
4466 if (Flags.isByVal()) {
4467 // Copy relative to framepointer.
4468 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4469 if (!StackPtr.getNode())
4470 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4471 getPointerTy(DAG.getDataLayout()));
4472 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4473 StackPtr, Source);
4474
4475 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4476 ArgChain,
4477 Flags, DAG, dl));
4478 } else {
4479 // Store relative to framepointer.
4480 MemOpChains2.push_back(DAG.getStore(
4481 ArgChain, dl, Arg, FIN,
4482 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4483 }
4484 }
4485
4486 if (!MemOpChains2.empty())
4487 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4488
4489 // Store the return address to the appropriate stack slot.
4490 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4491 getPointerTy(DAG.getDataLayout()),
4492 RegInfo->getSlotSize(), FPDiff, dl);
4493 }
4494
4495 // Build a sequence of copy-to-reg nodes chained together with token chain
4496 // and flag operands which copy the outgoing args into registers.
4497 SDValue InFlag;
4498 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4499 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4500 RegsToPass[i].second, InFlag);
4501 InFlag = Chain.getValue(1);
4502 }
4503
4504 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4505 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4505, __extension__ __PRETTY_FUNCTION__))
;
4506 // In the 64-bit large code model, we have to make all calls
4507 // through a register, since the call instruction's 32-bit
4508 // pc-relative offset may not be large enough to hold the whole
4509 // address.
4510 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4511 Callee->getOpcode() == ISD::ExternalSymbol) {
4512 // Lower direct calls to global addresses and external symbols. Setting
4513 // ForCall to true here has the effect of removing WrapperRIP when possible
4514 // to allow direct calls to be selected without first materializing the
4515 // address into a register.
4516 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4517 } else if (Subtarget.isTarget64BitILP32() &&
4518 Callee->getValueType(0) == MVT::i32) {
4519 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4520 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4521 }
4522
4523 // Returns a chain & a flag for retval copy to use.
4524 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4525 SmallVector<SDValue, 8> Ops;
4526
4527 if (!IsSibcall && isTailCall && !IsMustTail) {
4528 Chain = DAG.getCALLSEQ_END(Chain,
4529 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4530 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4531 InFlag = Chain.getValue(1);
4532 }
4533
4534 Ops.push_back(Chain);
4535 Ops.push_back(Callee);
4536
4537 if (isTailCall)
4538 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4539
4540 // Add argument registers to the end of the list so that they are known live
4541 // into the call.
4542 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4543 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4544 RegsToPass[i].second.getValueType()));
4545
4546 // Add a register mask operand representing the call-preserved registers.
4547 const uint32_t *Mask = [&]() {
4548 auto AdaptedCC = CallConv;
4549 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4550 // use X86_INTR calling convention because it has the same CSR mask
4551 // (same preserved registers).
4552 if (HasNCSR)
4553 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4554 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4555 // to use the CSR_NoRegs_RegMask.
4556 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4557 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4558 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4559 }();
4560 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4560, __extension__ __PRETTY_FUNCTION__))
;
4561
4562 // If this is an invoke in a 32-bit function using a funclet-based
4563 // personality, assume the function clobbers all registers. If an exception
4564 // is thrown, the runtime will not restore CSRs.
4565 // FIXME: Model this more precisely so that we can register allocate across
4566 // the normal edge and spill and fill across the exceptional edge.
4567 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4568 const Function &CallerFn = MF.getFunction();
4569 EHPersonality Pers =
4570 CallerFn.hasPersonalityFn()
4571 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4572 : EHPersonality::Unknown;
4573 if (isFuncletEHPersonality(Pers))
4574 Mask = RegInfo->getNoPreservedMask();
4575 }
4576
4577 // Define a new register mask from the existing mask.
4578 uint32_t *RegMask = nullptr;
4579
4580 // In some calling conventions we need to remove the used physical registers
4581 // from the reg mask.
4582 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4583 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4584
4585 // Allocate a new Reg Mask and copy Mask.
4586 RegMask = MF.allocateRegMask();
4587 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4588 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4589
4590 // Make sure all sub registers of the argument registers are reset
4591 // in the RegMask.
4592 for (auto const &RegPair : RegsToPass)
4593 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4594 SubRegs.isValid(); ++SubRegs)
4595 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4596
4597 // Create the RegMask Operand according to our updated mask.
4598 Ops.push_back(DAG.getRegisterMask(RegMask));
4599 } else {
4600 // Create the RegMask Operand according to the static mask.
4601 Ops.push_back(DAG.getRegisterMask(Mask));
4602 }
4603
4604 if (InFlag.getNode())
4605 Ops.push_back(InFlag);
4606
4607 if (isTailCall) {
4608 // We used to do:
4609 //// If this is the first return lowered for this function, add the regs
4610 //// to the liveout set for the function.
4611 // This isn't right, although it's probably harmless on x86; liveouts
4612 // should be computed from returns not tail calls. Consider a void
4613 // function making a tail call to a function returning int.
4614 MF.getFrameInfo().setHasTailCall();
4615 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4616 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4617 return Ret;
4618 }
4619
4620 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4621 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4622 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4623 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4624 // expanded to the call, directly followed by a special marker sequence and
4625 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4626 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4627, __extension__ __PRETTY_FUNCTION__))
4627 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4627, __extension__ __PRETTY_FUNCTION__))
;
4628 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4628, __extension__ __PRETTY_FUNCTION__))
;
4629
4630 // Add a target global address for the retainRV/claimRV runtime function
4631 // just before the call target.
4632 Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
4633 auto PtrVT = getPointerTy(DAG.getDataLayout());
4634 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
4635 Ops.insert(Ops.begin() + 1, GA);
4636 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4637 } else {
4638 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4639 }
4640
4641 InFlag = Chain.getValue(1);
4642 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4643 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4644
4645 // Save heapallocsite metadata.
4646 if (CLI.CB)
4647 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4648 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4649
4650 // Create the CALLSEQ_END node.
4651 unsigned NumBytesForCalleeToPop;
4652 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4653 DAG.getTarget().Options.GuaranteedTailCallOpt))
4654 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4655 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4656 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4657 SR == StackStructReturn)
4658 // If this is a call to a struct-return function, the callee
4659 // pops the hidden struct pointer, so we have to push it back.
4660 // This is common for Darwin/X86, Linux & Mingw32 targets.
4661 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4662 NumBytesForCalleeToPop = 4;
4663 else
4664 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4665
4666 // Returns a flag for retval copy to use.
4667 if (!IsSibcall) {
4668 Chain = DAG.getCALLSEQ_END(Chain,
4669 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4670 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4671 true),
4672 InFlag, dl);
4673 InFlag = Chain.getValue(1);
4674 }
4675
4676 // Handle result values, copying them out of physregs into vregs that we
4677 // return.
4678 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4679 InVals, RegMask);
4680}
4681
4682//===----------------------------------------------------------------------===//
4683// Fast Calling Convention (tail call) implementation
4684//===----------------------------------------------------------------------===//
4685
4686// Like std call, callee cleans arguments, convention except that ECX is
4687// reserved for storing the tail called function address. Only 2 registers are
4688// free for argument passing (inreg). Tail call optimization is performed
4689// provided:
4690// * tailcallopt is enabled
4691// * caller/callee are fastcc
4692// On X86_64 architecture with GOT-style position independent code only local
4693// (within module) calls are supported at the moment.
4694// To keep the stack aligned according to platform abi the function
4695// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4696// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4697// If a tail called function callee has more arguments than the caller the
4698// caller needs to make sure that there is room to move the RETADDR to. This is
4699// achieved by reserving an area the size of the argument delta right after the
4700// original RETADDR, but before the saved framepointer or the spilled registers
4701// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4702// stack layout:
4703// arg1
4704// arg2
4705// RETADDR
4706// [ new RETADDR
4707// move area ]
4708// (possible EBP)
4709// ESI
4710// EDI
4711// local1 ..
4712
4713/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4714/// requirement.
4715unsigned
4716X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4717 SelectionDAG &DAG) const {
4718 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4719 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4720 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4721, __extension__ __PRETTY_FUNCTION__))
4721 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4721, __extension__ __PRETTY_FUNCTION__))
;
4722 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4723}
4724
4725/// Return true if the given stack call argument is already available in the
4726/// same position (relatively) of the caller's incoming argument stack.
4727static
4728bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4729 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4730 const X86InstrInfo *TII, const CCValAssign &VA) {
4731 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4732
4733 for (;;) {
4734 // Look through nodes that don't alter the bits of the incoming value.
4735 unsigned Op = Arg.getOpcode();
4736 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4737 Arg = Arg.getOperand(0);
4738 continue;
4739 }
4740 if (Op == ISD::TRUNCATE) {
4741 const SDValue &TruncInput = Arg.getOperand(0);
4742 if (TruncInput.getOpcode() == ISD::AssertZext &&
4743 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4744 Arg.getValueType()) {
4745 Arg = TruncInput.getOperand(0);
4746 continue;
4747 }
4748 }
4749 break;
4750 }
4751
4752 int FI = INT_MAX2147483647;
4753 if (Arg.getOpcode() == ISD::CopyFromReg) {
4754 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4755 if (!VR.isVirtual())
4756 return false;
4757 MachineInstr *Def = MRI->getVRegDef(VR);
4758 if (!Def)
4759 return false;
4760 if (!Flags.isByVal()) {
4761 if (!TII->isLoadFromStackSlot(*Def, FI))
4762 return false;
4763 } else {
4764 unsigned Opcode = Def->getOpcode();
4765 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4766 Opcode == X86::LEA64_32r) &&
4767 Def->getOperand(1).isFI()) {
4768 FI = Def->getOperand(1).getIndex();
4769 Bytes = Flags.getByValSize();
4770 } else
4771 return false;
4772 }
4773 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4774 if (Flags.isByVal())
4775 // ByVal argument is passed in as a pointer but it's now being
4776 // dereferenced. e.g.
4777 // define @foo(%struct.X* %A) {
4778 // tail call @bar(%struct.X* byval %A)
4779 // }
4780 return false;
4781 SDValue Ptr = Ld->getBasePtr();
4782 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4783 if (!FINode)
4784 return false;
4785 FI = FINode->getIndex();
4786 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4787 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4788 FI = FINode->getIndex();
4789 Bytes = Flags.getByValSize();
4790 } else
4791 return false;
4792
4793 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4793, __extension__ __PRETTY_FUNCTION__))
;
4794 if (!MFI.isFixedObjectIndex(FI))
4795 return false;
4796
4797 if (Offset != MFI.getObjectOffset(FI))
4798 return false;
4799
4800 // If this is not byval, check that the argument stack object is immutable.
4801 // inalloca and argument copy elision can create mutable argument stack
4802 // objects. Byval objects can be mutated, but a byval call intends to pass the
4803 // mutated memory.
4804 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4805 return false;
4806
4807 if (VA.getLocVT().getFixedSizeInBits() >
4808 Arg.getValueSizeInBits().getFixedSize()) {
4809 // If the argument location is wider than the argument type, check that any
4810 // extension flags match.
4811 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4812 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4813 return false;
4814 }
4815 }
4816
4817 return Bytes == MFI.getObjectSize(FI);
4818}
4819
4820/// Check whether the call is eligible for tail call optimization. Targets
4821/// that want to do tail call optimization should implement this function.
4822bool X86TargetLowering::IsEligibleForTailCallOptimization(
4823 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
4824 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
4825 const SmallVectorImpl<SDValue> &OutVals,
4826 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4827 if (!mayTailCallThisCC(CalleeCC))
4828 return false;
4829
4830 // If -tailcallopt is specified, make fastcc functions tail-callable.
4831 MachineFunction &MF = DAG.getMachineFunction();
4832 const Function &CallerF = MF.getFunction();
4833
4834 // If the function return type is x86_fp80 and the callee return type is not,
4835 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4836 // perform a tailcall optimization here.
4837 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4838 return false;
4839
4840 CallingConv::ID CallerCC = CallerF.getCallingConv();
4841 bool CCMatch = CallerCC == CalleeCC;
4842 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4843 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4844 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4845 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
4846
4847 // Win64 functions have extra shadow space for argument homing. Don't do the
4848 // sibcall if the caller and callee have mismatched expectations for this
4849 // space.
4850 if (IsCalleeWin64 != IsCallerWin64)
4851 return false;
4852
4853 if (IsGuaranteeTCO) {
4854 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4855 return true;
4856 return false;
4857 }
4858
4859 // Look for obvious safe cases to perform tail call optimization that do not
4860 // require ABI changes. This is what gcc calls sibcall.
4861
4862 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4863 // emit a special epilogue.
4864 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4865 if (RegInfo->hasStackRealignment(MF))
4866 return false;
4867
4868 // Also avoid sibcall optimization if we're an sret return fn and the callee
4869 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
4870 // insufficient.
4871 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
4872 // For a compatible tail call the callee must return our sret pointer. So it
4873 // needs to be (a) an sret function itself and (b) we pass our sret as its
4874 // sret. Condition #b is harder to determine.
4875 return false;
4876 } else if (Subtarget.is32Bit() && IsCalleeStackStructRet)
4877 // In the i686 ABI, the sret pointer is callee-pop, so we cannot tail-call,
4878 // as our caller doesn't expect that.
4879 return false;
4880
4881 // Do not sibcall optimize vararg calls unless all arguments are passed via
4882 // registers.
4883 LLVMContext &C = *DAG.getContext();
4884 if (isVarArg && !Outs.empty()) {
4885 // Optimizing for varargs on Win64 is unlikely to be safe without
4886 // additional testing.
4887 if (IsCalleeWin64 || IsCallerWin64)
4888 return false;
4889
4890 SmallVector<CCValAssign, 16> ArgLocs;
4891 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4892
4893 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4894 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4895 if (!ArgLocs[i].isRegLoc())
4896 return false;
4897 }
4898
4899 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4900 // stack. Therefore, if it's not used by the call it is not safe to optimize
4901 // this into a sibcall.
4902 bool Unused = false;
4903 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4904 if (!Ins[i].Used) {
4905 Unused = true;
4906 break;
4907 }
4908 }
4909 if (Unused) {
4910 SmallVector<CCValAssign, 16> RVLocs;
4911 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4912 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4913 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4914 CCValAssign &VA = RVLocs[i];
4915 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4916 return false;
4917 }
4918 }
4919
4920 // Check that the call results are passed in the same way.
4921 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4922 RetCC_X86, RetCC_X86))
4923 return false;
4924 // The callee has to preserve all registers the caller needs to preserve.
4925 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4926 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4927 if (!CCMatch) {
4928 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4929 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4930 return false;
4931 }
4932
4933 unsigned StackArgsSize = 0;
4934
4935 // If the callee takes no arguments then go on to check the results of the
4936 // call.
4937 if (!Outs.empty()) {
4938 // Check if stack adjustment is needed. For now, do not do this if any
4939 // argument is passed on the stack.
4940 SmallVector<CCValAssign, 16> ArgLocs;
4941 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4942
4943 // Allocate shadow area for Win64
4944 if (IsCalleeWin64)
4945 CCInfo.AllocateStack(32, Align(8));
4946
4947 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4948 StackArgsSize = CCInfo.getNextStackOffset();
4949
4950 if (CCInfo.getNextStackOffset()) {
4951 // Check if the arguments are already laid out in the right way as
4952 // the caller's fixed stack objects.
4953 MachineFrameInfo &MFI = MF.getFrameInfo();
4954 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4955 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4956 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4957 CCValAssign &VA = ArgLocs[i];
4958 SDValue Arg = OutVals[i];
4959 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4960 if (VA.getLocInfo() == CCValAssign::Indirect)
4961 return false;
4962 if (!VA.isRegLoc()) {
4963 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4964 MFI, MRI, TII, VA))
4965 return false;
4966 }
4967 }
4968 }
4969
4970 bool PositionIndependent = isPositionIndependent();
4971 // If the tailcall address may be in a register, then make sure it's
4972 // possible to register allocate for it. In 32-bit, the call address can
4973 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4974 // callee-saved registers are restored. These happen to be the same
4975 // registers used to pass 'inreg' arguments so watch out for those.
4976 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4977 !isa<ExternalSymbolSDNode>(Callee)) ||
4978 PositionIndependent)) {
4979 unsigned NumInRegs = 0;
4980 // In PIC we need an extra register to formulate the address computation
4981 // for the callee.
4982 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4983
4984 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4985 CCValAssign &VA = ArgLocs[i];
4986 if (!VA.isRegLoc())
4987 continue;
4988 Register Reg = VA.getLocReg();
4989 switch (Reg) {
4990 default: break;
4991 case X86::EAX: case X86::EDX: case X86::ECX:
4992 if (++NumInRegs == MaxInRegs)
4993 return false;
4994 break;
4995 }
4996 }
4997 }
4998
4999 const MachineRegisterInfo &MRI = MF.getRegInfo();
5000 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5001 return false;
5002 }
5003
5004 bool CalleeWillPop =
5005 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
5006 MF.getTarget().Options.GuaranteedTailCallOpt);
5007
5008 if (unsigned BytesToPop =
5009 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
5010 // If we have bytes to pop, the callee must pop them.
5011 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
5012 if (!CalleePopMatches)
5013 return false;
5014 } else if (CalleeWillPop && StackArgsSize > 0) {
5015 // If we don't have bytes to pop, make sure the callee doesn't pop any.
5016 return false;
5017 }
5018
5019 return true;
5020}
5021
5022FastISel *
5023X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
5024 const TargetLibraryInfo *libInfo) const {
5025 return X86::createFastISel(funcInfo, libInfo);
5026}
5027
5028//===----------------------------------------------------------------------===//
5029// Other Lowering Hooks
5030//===----------------------------------------------------------------------===//
5031
5032static bool MayFoldLoad(SDValue Op, bool AssumeSingleUse = false) {
5033 return (AssumeSingleUse || Op.hasOneUse()) && ISD::isNormalLoad(Op.getNode());
5034}
5035
5036static bool MayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
5037 bool AssumeSingleUse = false) {
5038 if (!MayFoldLoad(Op, AssumeSingleUse))
5039 return false;
5040
5041 // We can not replace a wide volatile load with a broadcast-from-memory,
5042 // because that would narrow the load, which isn't legal for volatiles.
5043 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op.getNode());
5044 return !Ld->isVolatile() ||
5045 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
5046}
5047
5048static bool MayFoldIntoStore(SDValue Op) {
5049 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
5050}
5051
5052static bool MayFoldIntoZeroExtend(SDValue Op) {
5053 if (Op.hasOneUse()) {
5054 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
5055 return (ISD::ZERO_EXTEND == Opcode);
5056 }
5057 return false;
5058}
5059
5060static bool isTargetShuffle(unsigned Opcode) {
5061 switch(Opcode) {
5062 default: return false;
5063 case X86ISD::BLENDI:
5064 case X86ISD::PSHUFB:
5065 case X86ISD::PSHUFD:
5066 case X86ISD::PSHUFHW:
5067 case X86ISD::PSHUFLW:
5068 case X86ISD::SHUFP:
5069 case X86ISD::INSERTPS:
5070 case X86ISD::EXTRQI:
5071 case X86ISD::INSERTQI:
5072 case X86ISD::VALIGN:
5073 case X86ISD::PALIGNR:
5074 case X86ISD::VSHLDQ:
5075 case X86ISD::VSRLDQ:
5076 case X86ISD::MOVLHPS:
5077 case X86ISD::MOVHLPS:
5078 case X86ISD::MOVSHDUP:
5079 case X86ISD::MOVSLDUP:
5080 case X86ISD::MOVDDUP:
5081 case X86ISD::MOVSS:
5082 case X86ISD::MOVSD:
5083 case X86ISD::MOVSH:
5084 case X86ISD::UNPCKL:
5085 case X86ISD::UNPCKH:
5086 case X86ISD::VBROADCAST:
5087 case X86ISD::VPERMILPI:
5088 case X86ISD::VPERMILPV:
5089 case X86ISD::VPERM2X128:
5090 case X86ISD::SHUF128:
5091 case X86ISD::VPERMIL2:
5092 case X86ISD::VPERMI:
5093 case X86ISD::VPPERM:
5094 case X86ISD::VPERMV:
5095 case X86ISD::VPERMV3:
5096 case X86ISD::VZEXT_MOVL:
5097 return true;
5098 }
5099}
5100
5101static bool isTargetShuffleVariableMask(unsigned Opcode) {
5102 switch (Opcode) {
5103 default: return false;
5104 // Target Shuffles.
5105 case X86ISD::PSHUFB:
5106 case X86ISD::VPERMILPV:
5107 case X86ISD::VPERMIL2:
5108 case X86ISD::VPPERM:
5109 case X86ISD::VPERMV:
5110 case X86ISD::VPERMV3:
5111 return true;
5112 // 'Faux' Target Shuffles.
5113 case ISD::OR:
5114 case ISD::AND:
5115 case X86ISD::ANDNP:
5116 return true;
5117 }
5118}
5119
5120static bool isTargetShuffleSplat(SDValue Op) {
5121 unsigned Opcode = Op.getOpcode();
5122 if (Opcode == ISD::EXTRACT_SUBVECTOR)
5123 return isTargetShuffleSplat(Op.getOperand(0));
5124 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
5125}
5126
5127SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
5128 MachineFunction &MF = DAG.getMachineFunction();
5129 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5130 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
5131 int ReturnAddrIndex = FuncInfo->getRAIndex();
5132
5133 if (ReturnAddrIndex == 0) {
5134 // Set up a frame object for the return address.
5135 unsigned SlotSize = RegInfo->getSlotSize();
5136 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
5137 -(int64_t)SlotSize,
5138 false);
5139 FuncInfo->setRAIndex(ReturnAddrIndex);
5140 }
5141
5142 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
5143}
5144
5145bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
5146 bool hasSymbolicDisplacement) {
5147 // Offset should fit into 32 bit immediate field.
5148 if (!isInt<32>(Offset))
5149 return false;
5150
5151 // If we don't have a symbolic displacement - we don't have any extra
5152 // restrictions.
5153 if (!hasSymbolicDisplacement)
5154 return true;
5155
5156 // FIXME: Some tweaks might be needed for medium code model.
5157 if (M != CodeModel::Small && M != CodeModel::Kernel)
5158 return false;
5159
5160 // For small code model we assume that latest object is 16MB before end of 31
5161 // bits boundary. We may also accept pretty large negative constants knowing
5162 // that all objects are in the positive half of address space.
5163 if (M == CodeModel::Small && Offset < 16*1024*1024)
5164 return true;
5165
5166 // For kernel code model we know that all object resist in the negative half
5167 // of 32bits address space. We may not accept negative offsets, since they may
5168 // be just off and we may accept pretty large positive ones.
5169 if (M == CodeModel::Kernel && Offset >= 0)
5170 return true;
5171
5172 return false;
5173}
5174
5175/// Determines whether the callee is required to pop its own arguments.
5176/// Callee pop is necessary to support tail calls.
5177bool X86::isCalleePop(CallingConv::ID CallingConv,
5178 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
5179 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
5180 // can guarantee TCO.
5181 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
5182 return true;
5183
5184 switch (CallingConv) {
5185 default:
5186 return false;
5187 case CallingConv::X86_StdCall:
5188 case CallingConv::X86_FastCall:
5189 case CallingConv::X86_ThisCall:
5190 case CallingConv::X86_VectorCall:
5191 return !is64Bit;
5192 }
5193}
5194
5195/// Return true if the condition is an signed comparison operation.
5196static bool isX86CCSigned(unsigned X86CC) {
5197 switch (X86CC) {
5198 default:
5199 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5199)
;
5200 case X86::COND_E:
5201 case X86::COND_NE:
5202 case X86::COND_B:
5203 case X86::COND_A:
5204 case X86::COND_BE:
5205 case X86::COND_AE:
5206 return false;
5207 case X86::COND_G:
5208 case X86::COND_GE:
5209 case X86::COND_L:
5210 case X86::COND_LE:
5211 return true;
5212 }
5213}
5214
5215static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5216 switch (SetCCOpcode) {
5217 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5217)
;
5218 case ISD::SETEQ: return X86::COND_E;
5219 case ISD::SETGT: return X86::COND_G;
5220 case ISD::SETGE: return X86::COND_GE;
5221 case ISD::SETLT: return X86::COND_L;
5222 case ISD::SETLE: return X86::COND_LE;
5223 case ISD::SETNE: return X86::COND_NE;
5224 case ISD::SETULT: return X86::COND_B;
5225 case ISD::SETUGT: return X86::COND_A;
5226 case ISD::SETULE: return X86::COND_BE;
5227 case ISD::SETUGE: return X86::COND_AE;
5228 }
5229}
5230
5231/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5232/// condition code, returning the condition code and the LHS/RHS of the
5233/// comparison to make.
5234static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5235 bool isFP, SDValue &LHS, SDValue &RHS,
5236 SelectionDAG &DAG) {
5237 if (!isFP) {
5238 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5239 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
5240 // X > -1 -> X == 0, jump !sign.
5241 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5242 return X86::COND_NS;
5243 }
5244 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
5245 // X < 0 -> X == 0, jump on sign.
5246 return X86::COND_S;
5247 }
5248 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
5249 // X >= 0 -> X == 0, jump on !sign.
5250 return X86::COND_NS;
5251 }
5252 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5253 // X < 1 -> X <= 0
5254 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5255 return X86::COND_LE;
5256 }
5257 }
5258
5259 return TranslateIntegerX86CC(SetCCOpcode);
5260 }
5261
5262 // First determine if it is required or is profitable to flip the operands.
5263
5264 // If LHS is a foldable load, but RHS is not, flip the condition.
5265 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5266 !ISD::isNON_EXTLoad(RHS.getNode())) {
5267 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5268 std::swap(LHS, RHS);
5269 }
5270
5271 switch (SetCCOpcode) {
5272 default: break;
5273 case ISD::SETOLT:
5274 case ISD::SETOLE:
5275 case ISD::SETUGT:
5276 case ISD::SETUGE:
5277 std::swap(LHS, RHS);
5278 break;
5279 }
5280
5281 // On a floating point condition, the flags are set as follows:
5282 // ZF PF CF op
5283 // 0 | 0 | 0 | X > Y
5284 // 0 | 0 | 1 | X < Y
5285 // 1 | 0 | 0 | X == Y
5286 // 1 | 1 | 1 | unordered
5287 switch (SetCCOpcode) {
5288 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5288)
;
5289 case ISD::SETUEQ:
5290 case ISD::SETEQ: return X86::COND_E;
5291 case ISD::SETOLT: // flipped
5292 case ISD::SETOGT:
5293 case ISD::SETGT: return X86::COND_A;
5294 case ISD::SETOLE: // flipped
5295 case ISD::SETOGE:
5296 case ISD::SETGE: return X86::COND_AE;
5297 case ISD::SETUGT: // flipped
5298 case ISD::SETULT:
5299 case ISD::SETLT: return X86::COND_B;
5300 case ISD::SETUGE: // flipped
5301 case ISD::SETULE:
5302 case ISD::SETLE: return X86::COND_BE;
5303 case ISD::SETONE:
5304 case ISD::SETNE: return X86::COND_NE;
5305 case ISD::SETUO: return X86::COND_P;
5306 case ISD::SETO: return X86::COND_NP;
5307 case ISD::SETOEQ:
5308 case ISD::SETUNE: return X86::COND_INVALID;
5309 }
5310}
5311
5312/// Is there a floating point cmov for the specific X86 condition code?
5313/// Current x86 isa includes the following FP cmov instructions:
5314/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5315static bool hasFPCMov(unsigned X86CC) {
5316 switch (X86CC) {
5317 default:
5318 return false;
5319 case X86::COND_B:
5320 case X86::COND_BE:
5321 case X86::COND_E:
5322 case X86::COND_P:
5323 case X86::COND_A:
5324 case X86::COND_AE:
5325 case X86::COND_NE:
5326 case X86::COND_NP:
5327 return true;
5328 }
5329}
5330
5331
5332bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5333 const CallInst &I,
5334 MachineFunction &MF,
5335 unsigned Intrinsic) const {
5336 Info.flags = MachineMemOperand::MONone;
5337 Info.offset = 0;
5338
5339 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5340 if (!IntrData) {
5341 switch (Intrinsic) {
5342 case Intrinsic::x86_aesenc128kl:
5343 case Intrinsic::x86_aesdec128kl:
5344 Info.opc = ISD::INTRINSIC_W_CHAIN;
5345 Info.ptrVal = I.getArgOperand(1);
5346 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5347 Info.align = Align(1);
5348 Info.flags |= MachineMemOperand::MOLoad;
5349 return true;
5350 case Intrinsic::x86_aesenc256kl:
5351 case Intrinsic::x86_aesdec256kl:
5352 Info.opc = ISD::INTRINSIC_W_CHAIN;
5353 Info.ptrVal = I.getArgOperand(1);
5354 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5355 Info.align = Align(1);
5356 Info.flags |= MachineMemOperand::MOLoad;
5357 return true;
5358 case Intrinsic::x86_aesencwide128kl:
5359 case Intrinsic::x86_aesdecwide128kl:
5360 Info.opc = ISD::INTRINSIC_W_CHAIN;
5361 Info.ptrVal = I.getArgOperand(0);
5362 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5363 Info.align = Align(1);
5364 Info.flags |= MachineMemOperand::MOLoad;
5365 return true;
5366 case Intrinsic::x86_aesencwide256kl:
5367 case Intrinsic::x86_aesdecwide256kl:
5368 Info.opc = ISD::INTRINSIC_W_CHAIN;
5369 Info.ptrVal = I.getArgOperand(0);
5370 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5371 Info.align = Align(1);
5372 Info.flags |= MachineMemOperand::MOLoad;
5373 return true;
5374 }
5375 return false;
5376 }
5377
5378 switch (IntrData->Type) {
5379 case TRUNCATE_TO_MEM_VI8:
5380 case TRUNCATE_TO_MEM_VI16:
5381 case TRUNCATE_TO_MEM_VI32: {
5382 Info.opc = ISD::INTRINSIC_VOID;
5383 Info.ptrVal = I.getArgOperand(0);
5384 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5385 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5386 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5387 ScalarVT = MVT::i8;
5388 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5389 ScalarVT = MVT::i16;
5390 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5391 ScalarVT = MVT::i32;
5392
5393 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5394 Info.align = Align(1);
5395 Info.flags |= MachineMemOperand::MOStore;
5396 break;
5397 }
5398 case GATHER:
5399 case GATHER_AVX2: {
5400 Info.opc = ISD::INTRINSIC_W_CHAIN;
5401 Info.ptrVal = nullptr;
5402 MVT DataVT = MVT::getVT(I.getType());
5403 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5404 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5405 IndexVT.getVectorNumElements());
5406 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5407 Info.align = Align(1);
5408 Info.flags |= MachineMemOperand::MOLoad;
5409 break;
5410 }
5411 case SCATTER: {
5412 Info.opc = ISD::INTRINSIC_VOID;
5413 Info.ptrVal = nullptr;
5414 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5415 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5416 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5417 IndexVT.getVectorNumElements());
5418 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5419 Info.align = Align(1);
5420 Info.flags |= MachineMemOperand::MOStore;
5421 break;
5422 }
5423 default:
5424 return false;
5425 }
5426
5427 return true;
5428}
5429
5430/// Returns true if the target can instruction select the
5431/// specified FP immediate natively. If false, the legalizer will
5432/// materialize the FP immediate as a load from a constant pool.
5433bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5434 bool ForCodeSize) const {
5435 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5436 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5437 return true;
5438 }
5439 return false;
5440}
5441
5442bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5443 ISD::LoadExtType ExtTy,
5444 EVT NewVT) const {
5445 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5445, __extension__ __PRETTY_FUNCTION__))
;
5446
5447 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5448 // relocation target a movq or addq instruction: don't let the load shrink.
5449 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5450 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5451 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5452 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5453
5454 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5455 // those uses are extracted directly into a store, then the extract + store
5456 // can be store-folded. Therefore, it's probably not worth splitting the load.
5457 EVT VT = Load->getValueType(0);
5458 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5459 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5460 // Skip uses of the chain value. Result 0 of the node is the load value.
5461 if (UI.getUse().getResNo() != 0)
5462 continue;
5463
5464 // If this use is not an extract + store, it's probably worth splitting.
5465 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5466 UI->use_begin()->getOpcode() != ISD::STORE)
5467 return true;
5468 }
5469 // All non-chain uses are extract + store.
5470 return false;
5471 }
5472
5473 return true;
5474}
5475
5476/// Returns true if it is beneficial to convert a load of a constant
5477/// to just the constant itself.
5478bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5479 Type *Ty) const {
5480 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5480, __extension__ __PRETTY_FUNCTION__))
;
5481
5482 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5483 if (BitSize == 0 || BitSize > 64)
5484 return false;
5485 return true;
5486}
5487
5488bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5489 // If we are using XMM registers in the ABI and the condition of the select is
5490 // a floating-point compare and we have blendv or conditional move, then it is
5491 // cheaper to select instead of doing a cross-register move and creating a
5492 // load that depends on the compare result.
5493 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5494 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5495}
5496
5497bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5498 // TODO: It might be a win to ease or lift this restriction, but the generic
5499 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5500 if (VT.isVector() && Subtarget.hasAVX512())
5501 return false;
5502
5503 return true;
5504}
5505
5506bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5507 SDValue C) const {
5508 // TODO: We handle scalars using custom code, but generic combining could make
5509 // that unnecessary.
5510 APInt MulC;
5511 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5512 return false;
5513
5514 // Find the type this will be legalized too. Otherwise we might prematurely
5515 // convert this to shl+add/sub and then still have to type legalize those ops.
5516 // Another choice would be to defer the decision for illegal types until
5517 // after type legalization. But constant splat vectors of i64 can't make it
5518 // through type legalization on 32-bit targets so we would need to special
5519 // case vXi64.
5520 while (getTypeAction(Context, VT) != TypeLegal)
5521 VT = getTypeToTransformTo(Context, VT);
5522
5523 // If vector multiply is legal, assume that's faster than shl + add/sub.
5524 // TODO: Multiply is a complex op with higher latency and lower throughput in
5525 // most implementations, so this check could be loosened based on type
5526 // and/or a CPU attribute.
5527 if (isOperationLegal(ISD::MUL, VT))
5528 return false;
5529
5530 // shl+add, shl+sub, shl+add+neg
5531 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5532 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5533}
5534
5535bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5536 unsigned Index) const {
5537 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5538 return false;
5539
5540 // Mask vectors support all subregister combinations and operations that
5541 // extract half of vector.
5542 if (ResVT.getVectorElementType() == MVT::i1)
5543 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5544 (Index == ResVT.getVectorNumElements()));
5545
5546 return (Index % ResVT.getVectorNumElements()) == 0;
5547}
5548
5549bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5550 unsigned Opc = VecOp.getOpcode();
5551
5552 // Assume target opcodes can't be scalarized.
5553 // TODO - do we have any exceptions?
5554 if (Opc >= ISD::BUILTIN_OP_END)
5555 return false;
5556
5557 // If the vector op is not supported, try to convert to scalar.
5558 EVT VecVT = VecOp.getValueType();
5559 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5560 return true;
5561
5562 // If the vector op is supported, but the scalar op is not, the transform may
5563 // not be worthwhile.
5564 EVT ScalarVT = VecVT.getScalarType();
5565 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5566}
5567
5568bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5569 bool) const {
5570 // TODO: Allow vectors?
5571 if (VT.isVector())
5572 return false;
5573 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5574}
5575
5576bool X86TargetLowering::isCheapToSpeculateCttz() const {
5577 // Speculate cttz only if we can directly use TZCNT.
5578 return Subtarget.hasBMI();
5579}
5580
5581bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5582 // Speculate ctlz only if we can directly use LZCNT.
5583 return Subtarget.hasLZCNT();
5584}
5585
5586bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5587 const SelectionDAG &DAG,
5588 const MachineMemOperand &MMO) const {
5589 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5590 BitcastVT.getVectorElementType() == MVT::i1)
5591 return false;
5592
5593 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5594 return false;
5595
5596 // If both types are legal vectors, it's always ok to convert them.
5597 if (LoadVT.isVector() && BitcastVT.isVector() &&
5598 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5599 return true;
5600
5601 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5602}
5603
5604bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5605 const MachineFunction &MF) const {
5606 // Do not merge to float value size (128 bytes) if no implicit
5607 // float attribute is set.
5608 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
5609
5610 if (NoFloat) {
5611 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5612 return (MemVT.getSizeInBits() <= MaxIntSize);
5613 }
5614 // Make sure we don't merge greater than our preferred vector
5615 // width.
5616 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5617 return false;
5618
5619 return true;
5620}
5621
5622bool X86TargetLowering::isCtlzFast() const {
5623 return Subtarget.hasFastLZCNT();
5624}
5625
5626bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5627 const Instruction &AndI) const {
5628 return true;
5629}
5630
5631bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5632 EVT VT = Y.getValueType();
5633
5634 if (VT.isVector())
5635 return false;
5636
5637 if (!Subtarget.hasBMI())
5638 return false;
5639
5640 // There are only 32-bit and 64-bit forms for 'andn'.
5641 if (VT != MVT::i32 && VT != MVT::i64)
5642 return false;
5643
5644 return !isa<ConstantSDNode>(Y);
5645}
5646
5647bool X86TargetLowering::hasAndNot(SDValue Y) const {
5648 EVT VT = Y.getValueType();
5649
5650 if (!VT.isVector())
5651 return hasAndNotCompare(Y);
5652
5653 // Vector.
5654
5655 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5656 return false;
5657
5658 if (VT == MVT::v4i32)
5659 return true;
5660
5661 return Subtarget.hasSSE2();
5662}
5663
5664bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5665 return X.getValueType().isScalarInteger(); // 'bt'
5666}
5667
5668bool X86TargetLowering::
5669 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5670 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5671 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5672 SelectionDAG &DAG) const {
5673 // Does baseline recommend not to perform the fold by default?
5674 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5675 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5676 return false;
5677 // For scalars this transform is always beneficial.
5678 if (X.getValueType().isScalarInteger())
5679 return true;
5680 // If all the shift amounts are identical, then transform is beneficial even
5681 // with rudimentary SSE2 shifts.
5682 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5683 return true;
5684 // If we have AVX2 with it's powerful shift operations, then it's also good.
5685 if (Subtarget.hasAVX2())
5686 return true;
5687 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5688 return NewShiftOpcode == ISD::SHL;
5689}
5690
5691bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5692 const SDNode *N, CombineLevel Level) const {
5693 assert(((N->getOpcode() == ISD::SHL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5697, __extension__ __PRETTY_FUNCTION__))
5694 N->getOperand(0).getOpcode() == ISD::SRL) ||(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5697, __extension__ __PRETTY_FUNCTION__))
5695 (N->getOpcode() == ISD::SRL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5697, __extension__ __PRETTY_FUNCTION__))
5696 N->getOperand(0).getOpcode() == ISD::SHL)) &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5697, __extension__ __PRETTY_FUNCTION__))
5697 "Expected shift-shift mask")(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5697, __extension__ __PRETTY_FUNCTION__))
;
5698 EVT VT = N->getValueType(0);
5699 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5700 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5701 // Only fold if the shift values are equal - so it folds to AND.
5702 // TODO - we should fold if either is a non-uniform vector but we don't do
5703 // the fold for non-splats yet.
5704 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5705 }
5706 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5707}
5708
5709bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5710 EVT VT = Y.getValueType();
5711
5712 // For vectors, we don't have a preference, but we probably want a mask.
5713 if (VT.isVector())
5714 return false;
5715
5716 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5717 if (VT == MVT::i64 && !Subtarget.is64Bit())
5718 return false;
5719
5720 return true;
5721}
5722
5723bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5724 SDNode *N) const {
5725 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5726 !Subtarget.isOSWindows())
5727 return false;
5728 return true;
5729}
5730
5731bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5732 // Any legal vector type can be splatted more efficiently than
5733 // loading/spilling from memory.
5734 return isTypeLegal(VT);
5735}
5736
5737MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5738 MVT VT = MVT::getIntegerVT(NumBits);
5739 if (isTypeLegal(VT))
5740 return VT;
5741
5742 // PMOVMSKB can handle this.
5743 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5744 return MVT::v16i8;
5745
5746 // VPMOVMSKB can handle this.
5747 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5748 return MVT::v32i8;
5749
5750 // TODO: Allow 64-bit type for 32-bit target.
5751 // TODO: 512-bit types should be allowed, but make sure that those
5752 // cases are handled in combineVectorSizedSetCCEquality().
5753
5754 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5755}
5756
5757/// Val is the undef sentinel value or equal to the specified value.
5758static bool isUndefOrEqual(int Val, int CmpVal) {
5759 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5760}
5761
5762/// Return true if every element in Mask is the undef sentinel value or equal to
5763/// the specified value..
5764static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
5765 return llvm::all_of(Mask, [CmpVal](int M) {
5766 return (M == SM_SentinelUndef) || (M == CmpVal);
5767 });
5768}
5769
5770/// Val is either the undef or zero sentinel value.
5771static bool isUndefOrZero(int Val) {
5772 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5773}
5774
5775/// Return true if every element in Mask, beginning from position Pos and ending
5776/// in Pos+Size is the undef sentinel value.
5777static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5778 return llvm::all_of(Mask.slice(Pos, Size),
5779 [](int M) { return M == SM_SentinelUndef; });
5780}
5781
5782/// Return true if the mask creates a vector whose lower half is undefined.
5783static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5784 unsigned NumElts = Mask.size();
5785 return isUndefInRange(Mask, 0, NumElts / 2);
5786}
5787
5788/// Return true if the mask creates a vector whose upper half is undefined.
5789static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5790 unsigned NumElts = Mask.size();
5791 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5792}
5793
5794/// Return true if Val falls within the specified range (L, H].
5795static bool isInRange(int Val, int Low, int Hi) {
5796 return (Val >= Low && Val < Hi);
5797}
5798
5799/// Return true if the value of any element in Mask falls within the specified
5800/// range (L, H].
5801static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5802 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5803}
5804
5805/// Return true if the value of any element in Mask is the zero sentinel value.
5806static bool isAnyZero(ArrayRef<int> Mask) {
5807 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5808}
5809
5810/// Return true if the value of any element in Mask is the zero or undef
5811/// sentinel values.
5812static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5813 return llvm::any_of(Mask, [](int M) {
5814 return M == SM_SentinelZero || M == SM_SentinelUndef;
5815 });
5816}
5817
5818/// Return true if Val is undef or if its value falls within the
5819/// specified range (L, H].
5820static bool isUndefOrInRange(int Val, int Low, int Hi) {
5821 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5822}
5823
5824/// Return true if every element in Mask is undef or if its value
5825/// falls within the specified range (L, H].
5826static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5827 return llvm::all_of(
5828 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5829}
5830
5831/// Return true if Val is undef, zero or if its value falls within the
5832/// specified range (L, H].
5833static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5834 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5835}
5836
5837/// Return true if every element in Mask is undef, zero or if its value
5838/// falls within the specified range (L, H].
5839static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5840 return llvm::all_of(
5841 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5842}
5843
5844/// Return true if every element in Mask, beginning
5845/// from position Pos and ending in Pos + Size, falls within the specified
5846/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5847static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5848 unsigned Size, int Low, int Step = 1) {
5849 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5850 if (!isUndefOrEqual(Mask[i], Low))
5851 return false;
5852 return true;
5853}
5854
5855/// Return true if every element in Mask, beginning
5856/// from position Pos and ending in Pos+Size, falls within the specified
5857/// sequential range (Low, Low+Size], or is undef or is zero.
5858static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5859 unsigned Size, int Low,
5860 int Step = 1) {
5861 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5862 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5863 return false;
5864 return true;
5865}
5866
5867/// Return true if every element in Mask, beginning
5868/// from position Pos and ending in Pos+Size is undef or is zero.
5869static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5870 unsigned Size) {
5871 return llvm::all_of(Mask.slice(Pos, Size),
5872 [](int M) { return isUndefOrZero(M); });
5873}
5874
5875/// Helper function to test whether a shuffle mask could be
5876/// simplified by widening the elements being shuffled.
5877///
5878/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5879/// leaves it in an unspecified state.
5880///
5881/// NOTE: This must handle normal vector shuffle masks and *target* vector
5882/// shuffle masks. The latter have the special property of a '-2' representing
5883/// a zero-ed lane of a vector.
5884static bool canWidenShuffleElements(ArrayRef<int> Mask,
5885 SmallVectorImpl<int> &WidenedMask) {
5886 WidenedMask.assign(Mask.size() / 2, 0);
5887 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5888 int M0 = Mask[i];
5889 int M1 = Mask[i + 1];
5890
5891 // If both elements are undef, its trivial.
5892 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5893 WidenedMask[i / 2] = SM_SentinelUndef;
5894 continue;
5895 }
5896
5897 // Check for an undef mask and a mask value properly aligned to fit with
5898 // a pair of values. If we find such a case, use the non-undef mask's value.
5899 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5900 WidenedMask[i / 2] = M1 / 2;
5901