Bug Summary

File:llvm/include/llvm/ADT/APInt.h
Warning:line 403, column 36
The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'llvm::APInt::WordType'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/X86 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-08-28-193554-24367-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/IntrinsicLowering.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineJumpTableInfo.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetLowering.h"
43#include "llvm/CodeGen/WinEHFuncInfo.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/DiagnosticInfo.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/PatternMatch.h"
55#include "llvm/MC/MCAsmInfo.h"
56#include "llvm/MC/MCContext.h"
57#include "llvm/MC/MCExpr.h"
58#include "llvm/MC/MCSymbol.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <bitset>
67#include <cctype>
68#include <numeric>
69using namespace llvm;
70
71#define DEBUG_TYPE"x86-isel" "x86-isel"
72
73STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
74
75static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
76 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
77 cl::desc(
78 "Sets the preferable loop alignment for experiments (as log2 bytes) "
79 "for innermost loops only. If specified, this option overrides "
80 "alignment set by x86-experimental-pref-loop-alignment."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89static cl::opt<bool> ExperimentalUnorderedISEL(
90 "x86-experimental-unordered-atomic-isel", cl::init(false),
91 cl::desc("Use LoadSDNode and StoreSDNode instead of "
92 "AtomicSDNode for unordered atomic loads and "
93 "stores respectively."),
94 cl::Hidden);
95
96/// Call this when the user attempts to do something unsupported, like
97/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
98/// report_fatal_error, so calling code should attempt to recover without
99/// crashing.
100static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
101 const char *Msg) {
102 MachineFunction &MF = DAG.getMachineFunction();
103 DAG.getContext()->diagnose(
104 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
105}
106
107X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
108 const X86Subtarget &STI)
109 : TargetLowering(TM), Subtarget(STI) {
110 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
111 X86ScalarSSEf64 = Subtarget.hasSSE2();
112 X86ScalarSSEf32 = Subtarget.hasSSE1();
113 X86ScalarSSEf16 = Subtarget.hasFP16();
114 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
115
116 // Set up the TargetLowering object.
117
118 // X86 is weird. It always uses i8 for shift amounts and setcc results.
119 setBooleanContents(ZeroOrOneBooleanContent);
120 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
121 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
122
123 // For 64-bit, since we have so many registers, use the ILP scheduler.
124 // For 32-bit, use the register pressure specific scheduling.
125 // For Atom, always use ILP scheduling.
126 if (Subtarget.isAtom())
127 setSchedulingPreference(Sched::ILP);
128 else if (Subtarget.is64Bit())
129 setSchedulingPreference(Sched::ILP);
130 else
131 setSchedulingPreference(Sched::RegPressure);
132 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
133 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
134
135 // Bypass expensive divides and use cheaper ones.
136 if (TM.getOptLevel() >= CodeGenOpt::Default) {
137 if (Subtarget.hasSlowDivide32())
138 addBypassSlowDiv(32, 8);
139 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
140 addBypassSlowDiv(64, 32);
141 }
142
143 // Setup Windows compiler runtime calls.
144 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
145 static const struct {
146 const RTLIB::Libcall Op;
147 const char * const Name;
148 const CallingConv::ID CC;
149 } LibraryCalls[] = {
150 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
151 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
152 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
153 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
154 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
155 };
156
157 for (const auto &LC : LibraryCalls) {
158 setLibcallName(LC.Op, LC.Name);
159 setLibcallCallingConv(LC.Op, LC.CC);
160 }
161 }
162
163 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
164 // MSVCRT doesn't have powi; fall back to pow
165 setLibcallName(RTLIB::POWI_F32, nullptr);
166 setLibcallName(RTLIB::POWI_F64, nullptr);
167 }
168
169 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
170 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
171 // FIXME: Should we be limiting the atomic size on other configs? Default is
172 // 1024.
173 if (!Subtarget.hasCmpxchg8b())
174 setMaxAtomicSizeInBitsSupported(32);
175
176 // Set up the register classes.
177 addRegisterClass(MVT::i8, &X86::GR8RegClass);
178 addRegisterClass(MVT::i16, &X86::GR16RegClass);
179 addRegisterClass(MVT::i32, &X86::GR32RegClass);
180 if (Subtarget.is64Bit())
181 addRegisterClass(MVT::i64, &X86::GR64RegClass);
182
183 for (MVT VT : MVT::integer_valuetypes())
184 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
185
186 // We don't accept any truncstore of integer registers.
187 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
188 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
189 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
190 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
191 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
192 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
193
194 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
195
196 // SETOEQ and SETUNE require checking two conditions.
197 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
198 setCondCodeAction(ISD::SETOEQ, VT, Expand);
199 setCondCodeAction(ISD::SETUNE, VT, Expand);
200 }
201
202 // Integer absolute.
203 if (Subtarget.hasCMov()) {
204 setOperationAction(ISD::ABS , MVT::i16 , Custom);
205 setOperationAction(ISD::ABS , MVT::i32 , Custom);
206 if (Subtarget.is64Bit())
207 setOperationAction(ISD::ABS , MVT::i64 , Custom);
208 }
209
210 // Signed saturation subtraction.
211 setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
212 setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
213 setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
214 if (Subtarget.is64Bit())
215 setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
216
217 // Funnel shifts.
218 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
219 // For slow shld targets we only lower for code size.
220 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
221
222 setOperationAction(ShiftOp , MVT::i8 , Custom);
223 setOperationAction(ShiftOp , MVT::i16 , Custom);
224 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
225 if (Subtarget.is64Bit())
226 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
227 }
228
229 if (!Subtarget.useSoftFloat()) {
230 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
231 // operation.
232 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
233 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
234 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
235 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
236 // We have an algorithm for SSE2, and we turn this into a 64-bit
237 // FILD or VCVTUSI2SS/SD for other targets.
238 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
239 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
240 // We have an algorithm for SSE2->double, and we turn this into a
241 // 64-bit FILD followed by conditional FADD for other targets.
242 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
243 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
244
245 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
246 // this operation.
247 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
248 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
249 // SSE has no i16 to fp conversion, only i32. We promote in the handler
250 // to allow f80 to use i16 and f64 to use i16 with sse1 only
251 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
252 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
253 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
254 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
255 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
256 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
257 // are Legal, f80 is custom lowered.
258 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
259 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
260
261 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
262 // this operation.
263 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
264 // FIXME: This doesn't generate invalid exception when it should. PR44019.
265 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
266 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
267 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
268 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
269 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
270 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
271 // are Legal, f80 is custom lowered.
272 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
273 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
274
275 // Handle FP_TO_UINT by promoting the destination to a larger signed
276 // conversion.
277 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
278 // FIXME: This doesn't generate invalid exception when it should. PR44019.
279 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
280 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
281 // FIXME: This doesn't generate invalid exception when it should. PR44019.
282 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
283 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
284 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
285 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
286 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
287
288 setOperationAction(ISD::LRINT, MVT::f32, Custom);
289 setOperationAction(ISD::LRINT, MVT::f64, Custom);
290 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
291 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
292
293 if (!Subtarget.is64Bit()) {
294 setOperationAction(ISD::LRINT, MVT::i64, Custom);
295 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
296 }
297 }
298
299 if (Subtarget.hasSSE2()) {
300 // Custom lowering for saturating float to int conversions.
301 // We handle promotion to larger result types manually.
302 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
303 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
304 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
305 }
306 if (Subtarget.is64Bit()) {
307 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
308 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
309 }
310 }
311
312 // Handle address space casts between mixed sized pointers.
313 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
314 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
315
316 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
317 if (!X86ScalarSSEf64) {
318 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
319 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
320 if (Subtarget.is64Bit()) {
321 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
322 // Without SSE, i64->f64 goes through memory.
323 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
324 }
325 } else if (!Subtarget.is64Bit())
326 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
327
328 // Scalar integer divide and remainder are lowered to use operations that
329 // produce two results, to match the available instructions. This exposes
330 // the two-result form to trivial CSE, which is able to combine x/y and x%y
331 // into a single instruction.
332 //
333 // Scalar integer multiply-high is also lowered to use two-result
334 // operations, to match the available instructions. However, plain multiply
335 // (low) operations are left as Legal, as there are single-result
336 // instructions for this in x86. Using the two-result multiply instructions
337 // when both high and low results are needed must be arranged by dagcombine.
338 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
339 setOperationAction(ISD::MULHS, VT, Expand);
340 setOperationAction(ISD::MULHU, VT, Expand);
341 setOperationAction(ISD::SDIV, VT, Expand);
342 setOperationAction(ISD::UDIV, VT, Expand);
343 setOperationAction(ISD::SREM, VT, Expand);
344 setOperationAction(ISD::UREM, VT, Expand);
345 }
346
347 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
348 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
349 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
350 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
351 setOperationAction(ISD::BR_CC, VT, Expand);
352 setOperationAction(ISD::SELECT_CC, VT, Expand);
353 }
354 if (Subtarget.is64Bit())
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
356 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
358 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
359
360 setOperationAction(ISD::FREM , MVT::f32 , Expand);
361 setOperationAction(ISD::FREM , MVT::f64 , Expand);
362 setOperationAction(ISD::FREM , MVT::f80 , Expand);
363 setOperationAction(ISD::FREM , MVT::f128 , Expand);
364
365 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
366 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
367 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
368 }
369
370 // Promote the i8 variants and force them on up to i32 which has a shorter
371 // encoding.
372 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
373 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
374
375 if (Subtarget.hasBMI()) {
376 // Promote the i16 zero undef variant and force it on up to i32 when tzcnt
377 // is enabled.
378 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
379 } else {
380 setOperationAction(ISD::CTTZ, MVT::i16, Custom);
381 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
382 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
383 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
384 if (Subtarget.is64Bit()) {
385 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
386 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
387 }
388 }
389
390 if (Subtarget.hasLZCNT()) {
391 // When promoting the i8 variants, force them to i32 for a shorter
392 // encoding.
393 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
394 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
395 } else {
396 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
397 if (VT == MVT::i64 && !Subtarget.is64Bit())
398 continue;
399 setOperationAction(ISD::CTLZ , VT, Custom);
400 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
401 }
402 }
403
404 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
405 ISD::STRICT_FP_TO_FP16}) {
406 // Special handling for half-precision floating point conversions.
407 // If we don't have F16C support, then lower half float conversions
408 // into library calls.
409 setOperationAction(
410 Op, MVT::f32,
411 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
412 // There's never any support for operations beyond MVT::f32.
413 setOperationAction(Op, MVT::f64, Expand);
414 setOperationAction(Op, MVT::f80, Expand);
415 setOperationAction(Op, MVT::f128, Expand);
416 }
417
418 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
419 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
420 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
421 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
422 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
423 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
424 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
425 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
426
427 setOperationAction(ISD::PARITY, MVT::i8, Custom);
428 if (Subtarget.hasPOPCNT()) {
429 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
430 } else {
431 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
432 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
433 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
434 if (Subtarget.is64Bit())
435 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
436 else
437 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
438
439 setOperationAction(ISD::PARITY, MVT::i16, Custom);
440 setOperationAction(ISD::PARITY, MVT::i32, Custom);
441 if (Subtarget.is64Bit())
442 setOperationAction(ISD::PARITY, MVT::i64, Custom);
443 }
444
445 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
446
447 if (!Subtarget.hasMOVBE())
448 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
449
450 // X86 wants to expand cmov itself.
451 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
452 setOperationAction(ISD::SELECT, VT, Custom);
453 setOperationAction(ISD::SETCC, VT, Custom);
454 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
455 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
456 }
457 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
458 if (VT == MVT::i64 && !Subtarget.is64Bit())
459 continue;
460 setOperationAction(ISD::SELECT, VT, Custom);
461 setOperationAction(ISD::SETCC, VT, Custom);
462 }
463
464 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
465 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
466 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
467
468 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
469 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
470 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
471 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
472 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
473 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
474 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
475 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
476
477 // Darwin ABI issue.
478 for (auto VT : { MVT::i32, MVT::i64 }) {
479 if (VT == MVT::i64 && !Subtarget.is64Bit())
480 continue;
481 setOperationAction(ISD::ConstantPool , VT, Custom);
482 setOperationAction(ISD::JumpTable , VT, Custom);
483 setOperationAction(ISD::GlobalAddress , VT, Custom);
484 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
485 setOperationAction(ISD::ExternalSymbol , VT, Custom);
486 setOperationAction(ISD::BlockAddress , VT, Custom);
487 }
488
489 // 64-bit shl, sra, srl (iff 32-bit x86)
490 for (auto VT : { MVT::i32, MVT::i64 }) {
491 if (VT == MVT::i64 && !Subtarget.is64Bit())
492 continue;
493 setOperationAction(ISD::SHL_PARTS, VT, Custom);
494 setOperationAction(ISD::SRA_PARTS, VT, Custom);
495 setOperationAction(ISD::SRL_PARTS, VT, Custom);
496 }
497
498 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
499 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
500
501 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
502
503 // Expand certain atomics
504 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
505 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
506 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
507 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
508 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
509 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
510 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
511 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
512 }
513
514 if (!Subtarget.is64Bit())
515 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
516
517 if (Subtarget.hasCmpxchg16b()) {
518 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
519 }
520
521 // FIXME - use subtarget debug flags
522 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
523 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
524 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
525 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
526 }
527
528 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
529 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
530
531 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
532 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
533
534 setOperationAction(ISD::TRAP, MVT::Other, Legal);
535 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
536 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
537
538 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
539 setOperationAction(ISD::VASTART , MVT::Other, Custom);
540 setOperationAction(ISD::VAEND , MVT::Other, Expand);
541 bool Is64Bit = Subtarget.is64Bit();
542 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
543 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
544
545 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
546 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
547
548 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
549
550 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
551 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
552 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
553
554 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
555 // f32 and f64 use SSE.
556 // Set up the FP register classes.
557 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
558 : &X86::FR32RegClass);
559 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
560 : &X86::FR64RegClass);
561
562 // Disable f32->f64 extload as we can only generate this in one instruction
563 // under optsize. So its easier to pattern match (fpext (load)) for that
564 // case instead of needing to emit 2 instructions for extload in the
565 // non-optsize case.
566 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
567
568 for (auto VT : { MVT::f32, MVT::f64 }) {
569 // Use ANDPD to simulate FABS.
570 setOperationAction(ISD::FABS, VT, Custom);
571
572 // Use XORP to simulate FNEG.
573 setOperationAction(ISD::FNEG, VT, Custom);
574
575 // Use ANDPD and ORPD to simulate FCOPYSIGN.
576 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
577
578 // These might be better off as horizontal vector ops.
579 setOperationAction(ISD::FADD, VT, Custom);
580 setOperationAction(ISD::FSUB, VT, Custom);
581
582 // We don't support sin/cos/fmod
583 setOperationAction(ISD::FSIN , VT, Expand);
584 setOperationAction(ISD::FCOS , VT, Expand);
585 setOperationAction(ISD::FSINCOS, VT, Expand);
586 }
587
588 // Lower this to MOVMSK plus an AND.
589 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
590 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
591
592 } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
593 (UseX87 || Is64Bit)) {
594 // Use SSE for f32, x87 for f64.
595 // Set up the FP register classes.
596 addRegisterClass(MVT::f32, &X86::FR32RegClass);
597 if (UseX87)
598 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
599
600 // Use ANDPS to simulate FABS.
601 setOperationAction(ISD::FABS , MVT::f32, Custom);
602
603 // Use XORP to simulate FNEG.
604 setOperationAction(ISD::FNEG , MVT::f32, Custom);
605
606 if (UseX87)
607 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
608
609 // Use ANDPS and ORPS to simulate FCOPYSIGN.
610 if (UseX87)
611 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
612 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
613
614 // We don't support sin/cos/fmod
615 setOperationAction(ISD::FSIN , MVT::f32, Expand);
616 setOperationAction(ISD::FCOS , MVT::f32, Expand);
617 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
618
619 if (UseX87) {
620 // Always expand sin/cos functions even though x87 has an instruction.
621 setOperationAction(ISD::FSIN, MVT::f64, Expand);
622 setOperationAction(ISD::FCOS, MVT::f64, Expand);
623 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
624 }
625 } else if (UseX87) {
626 // f32 and f64 in x87.
627 // Set up the FP register classes.
628 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
629 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
630
631 for (auto VT : { MVT::f32, MVT::f64 }) {
632 setOperationAction(ISD::UNDEF, VT, Expand);
633 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
634
635 // Always expand sin/cos functions even though x87 has an instruction.
636 setOperationAction(ISD::FSIN , VT, Expand);
637 setOperationAction(ISD::FCOS , VT, Expand);
638 setOperationAction(ISD::FSINCOS, VT, Expand);
639 }
640 }
641
642 // Expand FP32 immediates into loads from the stack, save special cases.
643 if (isTypeLegal(MVT::f32)) {
644 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
645 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
646 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
647 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
648 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
649 } else // SSE immediates.
650 addLegalFPImmediate(APFloat(+0.0f)); // xorps
651 }
652 // Expand FP64 immediates into loads from the stack, save special cases.
653 if (isTypeLegal(MVT::f64)) {
654 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
655 addLegalFPImmediate(APFloat(+0.0)); // FLD0
656 addLegalFPImmediate(APFloat(+1.0)); // FLD1
657 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
658 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
659 } else // SSE immediates.
660 addLegalFPImmediate(APFloat(+0.0)); // xorpd
661 }
662 // Handle constrained floating-point operations of scalar.
663 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
664 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
665 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
666 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
667 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
668 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
669 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
670 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
671 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
672 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
673 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
674 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
675 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
676
677 // We don't support FMA.
678 setOperationAction(ISD::FMA, MVT::f64, Expand);
679 setOperationAction(ISD::FMA, MVT::f32, Expand);
680
681 // f80 always uses X87.
682 if (UseX87) {
683 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
684 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
685 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
686 {
687 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
688 addLegalFPImmediate(TmpFlt); // FLD0
689 TmpFlt.changeSign();
690 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
691
692 bool ignored;
693 APFloat TmpFlt2(+1.0);
694 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
695 &ignored);
696 addLegalFPImmediate(TmpFlt2); // FLD1
697 TmpFlt2.changeSign();
698 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
699 }
700
701 // Always expand sin/cos functions even though x87 has an instruction.
702 setOperationAction(ISD::FSIN , MVT::f80, Expand);
703 setOperationAction(ISD::FCOS , MVT::f80, Expand);
704 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
705
706 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
707 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
708 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
709 setOperationAction(ISD::FRINT, MVT::f80, Expand);
710 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
711 setOperationAction(ISD::FMA, MVT::f80, Expand);
712 setOperationAction(ISD::LROUND, MVT::f80, Expand);
713 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
714 setOperationAction(ISD::LRINT, MVT::f80, Custom);
715 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
716 setOperationAction(ISD::ISNAN, MVT::f80, Custom);
717
718 // Handle constrained floating-point operations of scalar.
719 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
720 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
721 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
722 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
723 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
724 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
725 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
726 // as Custom.
727 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
728 }
729
730 // f128 uses xmm registers, but most operations require libcalls.
731 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
732 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
733 : &X86::VR128RegClass);
734
735 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
736
737 setOperationAction(ISD::FADD, MVT::f128, LibCall);
738 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
739 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
740 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
741 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
742 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
743 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
744 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
745 setOperationAction(ISD::FMA, MVT::f128, LibCall);
746 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
747
748 setOperationAction(ISD::FABS, MVT::f128, Custom);
749 setOperationAction(ISD::FNEG, MVT::f128, Custom);
750 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
751
752 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
753 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
754 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
755 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
756 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
757 // No STRICT_FSINCOS
758 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
759 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
760
761 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
762 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
763 // We need to custom handle any FP_ROUND with an f128 input, but
764 // LegalizeDAG uses the result type to know when to run a custom handler.
765 // So we have to list all legal floating point result types here.
766 if (isTypeLegal(MVT::f32)) {
767 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
768 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
769 }
770 if (isTypeLegal(MVT::f64)) {
771 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
772 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
773 }
774 if (isTypeLegal(MVT::f80)) {
775 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
776 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
777 }
778
779 setOperationAction(ISD::SETCC, MVT::f128, Custom);
780
781 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
782 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
783 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
784 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
785 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
786 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
787 }
788
789 // Always use a library call for pow.
790 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
791 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
792 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
793 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
794
795 setOperationAction(ISD::FLOG, MVT::f80, Expand);
796 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
797 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
798 setOperationAction(ISD::FEXP, MVT::f80, Expand);
799 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
800 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
801 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
802
803 // Some FP actions are always expanded for vector types.
804 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
805 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
806 setOperationAction(ISD::FSIN, VT, Expand);
807 setOperationAction(ISD::FSINCOS, VT, Expand);
808 setOperationAction(ISD::FCOS, VT, Expand);
809 setOperationAction(ISD::FREM, VT, Expand);
810 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
811 setOperationAction(ISD::FPOW, VT, Expand);
812 setOperationAction(ISD::FLOG, VT, Expand);
813 setOperationAction(ISD::FLOG2, VT, Expand);
814 setOperationAction(ISD::FLOG10, VT, Expand);
815 setOperationAction(ISD::FEXP, VT, Expand);
816 setOperationAction(ISD::FEXP2, VT, Expand);
817 }
818
819 // First set operation action for all vector types to either promote
820 // (for widening) or expand (for scalarization). Then we will selectively
821 // turn on ones that can be effectively codegen'd.
822 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
823 setOperationAction(ISD::SDIV, VT, Expand);
824 setOperationAction(ISD::UDIV, VT, Expand);
825 setOperationAction(ISD::SREM, VT, Expand);
826 setOperationAction(ISD::UREM, VT, Expand);
827 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
828 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
829 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
830 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
831 setOperationAction(ISD::FMA, VT, Expand);
832 setOperationAction(ISD::FFLOOR, VT, Expand);
833 setOperationAction(ISD::FCEIL, VT, Expand);
834 setOperationAction(ISD::FTRUNC, VT, Expand);
835 setOperationAction(ISD::FRINT, VT, Expand);
836 setOperationAction(ISD::FNEARBYINT, VT, Expand);
837 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
838 setOperationAction(ISD::MULHS, VT, Expand);
839 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
840 setOperationAction(ISD::MULHU, VT, Expand);
841 setOperationAction(ISD::SDIVREM, VT, Expand);
842 setOperationAction(ISD::UDIVREM, VT, Expand);
843 setOperationAction(ISD::CTPOP, VT, Expand);
844 setOperationAction(ISD::CTTZ, VT, Expand);
845 setOperationAction(ISD::CTLZ, VT, Expand);
846 setOperationAction(ISD::ROTL, VT, Expand);
847 setOperationAction(ISD::ROTR, VT, Expand);
848 setOperationAction(ISD::BSWAP, VT, Expand);
849 setOperationAction(ISD::SETCC, VT, Expand);
850 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
851 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
852 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
853 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
854 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
855 setOperationAction(ISD::TRUNCATE, VT, Expand);
856 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
857 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
858 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
859 setOperationAction(ISD::SELECT_CC, VT, Expand);
860 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
861 setTruncStoreAction(InnerVT, VT, Expand);
862
863 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
864 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
865
866 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
867 // types, we have to deal with them whether we ask for Expansion or not.
868 // Setting Expand causes its own optimisation problems though, so leave
869 // them legal.
870 if (VT.getVectorElementType() == MVT::i1)
871 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
872
873 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
874 // split/scalarized right now.
875 if (VT.getVectorElementType() == MVT::f16)
876 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
877 }
878 }
879
880 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
881 // with -msoft-float, disable use of MMX as well.
882 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
883 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
884 // No operations on x86mmx supported, everything uses intrinsics.
885 }
886
887 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
888 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
889 : &X86::VR128RegClass);
890
891 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
892 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
893 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
894 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
895 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
896 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
897 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
898 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
899
900 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
901 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
902
903 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
904 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
905 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
906 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
907 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
908 }
909
910 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
911 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
912 : &X86::VR128RegClass);
913
914 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
915 // registers cannot be used even for integer operations.
916 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
917 : &X86::VR128RegClass);
918 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
919 : &X86::VR128RegClass);
920 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
921 : &X86::VR128RegClass);
922 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
923 : &X86::VR128RegClass);
924
925 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
926 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
927 setOperationAction(ISD::SDIV, VT, Custom);
928 setOperationAction(ISD::SREM, VT, Custom);
929 setOperationAction(ISD::UDIV, VT, Custom);
930 setOperationAction(ISD::UREM, VT, Custom);
931 }
932
933 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
934 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
935 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
936
937 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
938 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
939 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
940 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
941 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
942 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
943 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
944 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
945 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
946 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
947
948 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
949 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
950
951 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
952 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
953 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
954
955 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
956 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
957 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
958 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
959 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
960 }
961
962 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
963 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
964 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
965 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
966 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
967 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
968 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
969 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
970 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
971 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
972
973 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
974 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
975 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
976
977 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
978 setOperationAction(ISD::SETCC, VT, Custom);
979 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
980 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
981 setOperationAction(ISD::CTPOP, VT, Custom);
982 setOperationAction(ISD::ABS, VT, Custom);
983
984 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
985 // setcc all the way to isel and prefer SETGT in some isel patterns.
986 setCondCodeAction(ISD::SETLT, VT, Custom);
987 setCondCodeAction(ISD::SETLE, VT, Custom);
988 }
989
990 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
991 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
992 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
993 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
994 setOperationAction(ISD::VSELECT, VT, Custom);
995 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
996 }
997
998 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
999 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1000 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1001 setOperationAction(ISD::VSELECT, VT, Custom);
1002
1003 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1004 continue;
1005
1006 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1007 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1008 }
1009
1010 // Custom lower v2i64 and v2f64 selects.
1011 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1012 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1013 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1014 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1015 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1016
1017 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1018 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1019 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1020 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1021 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
1022 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1023
1024 // Custom legalize these to avoid over promotion or custom promotion.
1025 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1026 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1027 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1028 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1029 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1030 }
1031
1032 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1033 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
1034 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1035 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1036
1037 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1038 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1039
1040 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1041 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1042
1043 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1044 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1045 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1046 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1047 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1048
1049 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1050 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1051 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1052 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1053
1054 // We want to legalize this to an f64 load rather than an i64 load on
1055 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1056 // store.
1057 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1058 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1059 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1060 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1061 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1062 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1063
1064 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1065 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1066 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1067 if (!Subtarget.hasAVX512())
1068 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1069
1070 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1071 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1072 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1073
1074 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1075
1076 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1077 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1078 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1079 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1080 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1081 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1082
1083 // In the customized shift lowering, the legal v4i32/v2i64 cases
1084 // in AVX2 will be recognized.
1085 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1086 setOperationAction(ISD::SRL, VT, Custom);
1087 setOperationAction(ISD::SHL, VT, Custom);
1088 setOperationAction(ISD::SRA, VT, Custom);
1089 }
1090
1091 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1092 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1093
1094 // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
1095 // shifts) is better.
1096 if (!Subtarget.useAVX512Regs() &&
1097 !(Subtarget.hasBWI() && Subtarget.hasVLX()))
1098 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1099
1100 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1101 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1102 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1103 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1104 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1105 }
1106
1107 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1108 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1109 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1110 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1111 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1112 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1113 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1114 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1115 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1116
1117 // These might be better off as horizontal vector ops.
1118 setOperationAction(ISD::ADD, MVT::i16, Custom);
1119 setOperationAction(ISD::ADD, MVT::i32, Custom);
1120 setOperationAction(ISD::SUB, MVT::i16, Custom);
1121 setOperationAction(ISD::SUB, MVT::i32, Custom);
1122 }
1123
1124 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1125 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1126 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1127 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1128 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1129 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1130 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1131 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1132 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1133 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1134 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1135 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1136 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1137 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1138
1139 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1140 }
1141
1142 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1143 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1144 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1145 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1146 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1147 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1148 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1149 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1150
1151 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1152 setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
1153 setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
1154
1155 // FIXME: Do we need to handle scalar-to-vector here?
1156 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1157
1158 // We directly match byte blends in the backend as they match the VSELECT
1159 // condition form.
1160 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1161
1162 // SSE41 brings specific instructions for doing vector sign extend even in
1163 // cases where we don't have SRA.
1164 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1165 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1166 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1167 }
1168
1169 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1170 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1171 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1172 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1173 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1174 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1175 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1176 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1177 }
1178
1179 // i8 vectors are custom because the source register and source
1180 // source memory operand types are not the same width.
1181 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1182
1183 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1184 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1185 // do the pre and post work in the vector domain.
1186 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1187 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1188 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1189 // so that DAG combine doesn't try to turn it into uint_to_fp.
1190 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1191 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1192 }
1193 }
1194
1195 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1196 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1197 }
1198
1199 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1200 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1201 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1202 setOperationAction(ISD::ROTL, VT, Custom);
1203
1204 // XOP can efficiently perform BITREVERSE with VPPERM.
1205 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1206 setOperationAction(ISD::BITREVERSE, VT, Custom);
1207
1208 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1209 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1210 setOperationAction(ISD::BITREVERSE, VT, Custom);
1211 }
1212
1213 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1214 bool HasInt256 = Subtarget.hasInt256();
1215
1216 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1217 : &X86::VR256RegClass);
1218 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1219 : &X86::VR256RegClass);
1220 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1221 : &X86::VR256RegClass);
1222 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1223 : &X86::VR256RegClass);
1224 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1225 : &X86::VR256RegClass);
1226 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1227 : &X86::VR256RegClass);
1228
1229 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1230 setOperationAction(ISD::FFLOOR, VT, Legal);
1231 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1232 setOperationAction(ISD::FCEIL, VT, Legal);
1233 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1234 setOperationAction(ISD::FTRUNC, VT, Legal);
1235 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1236 setOperationAction(ISD::FRINT, VT, Legal);
1237 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1238 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1239 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1240 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1241 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1242
1243 setOperationAction(ISD::FROUND, VT, Custom);
1244
1245 setOperationAction(ISD::FNEG, VT, Custom);
1246 setOperationAction(ISD::FABS, VT, Custom);
1247 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1248 }
1249
1250 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1251 // even though v8i16 is a legal type.
1252 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1253 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1254 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1255 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1256 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1257 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1258 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1259
1260 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1261 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1262
1263 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1264 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1265 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1266 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1267 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1268 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1269 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1270 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1271 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1272 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1273 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1274 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1275
1276 if (!Subtarget.hasAVX512())
1277 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1278
1279 // In the customized shift lowering, the legal v8i32/v4i64 cases
1280 // in AVX2 will be recognized.
1281 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1282 setOperationAction(ISD::SRL, VT, Custom);
1283 setOperationAction(ISD::SHL, VT, Custom);
1284 setOperationAction(ISD::SRA, VT, Custom);
1285 }
1286
1287 // These types need custom splitting if their input is a 128-bit vector.
1288 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1289 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1290 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1291 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1292
1293 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1294 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1295
1296 // With BWI, expanding (and promoting the shifts) is the better.
1297 if (!Subtarget.useBWIRegs())
1298 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1299
1300 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1301 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1302 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1303 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1304 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1305 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1306
1307 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1308 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1309 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1310 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1311 }
1312
1313 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1314 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1315 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1316 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1317
1318 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1319 setOperationAction(ISD::SETCC, VT, Custom);
1320 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1321 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1322 setOperationAction(ISD::CTPOP, VT, Custom);
1323 setOperationAction(ISD::CTLZ, VT, Custom);
1324
1325 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1326 // setcc all the way to isel and prefer SETGT in some isel patterns.
1327 setCondCodeAction(ISD::SETLT, VT, Custom);
1328 setCondCodeAction(ISD::SETLE, VT, Custom);
1329 }
1330
1331 if (Subtarget.hasAnyFMA()) {
1332 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1333 MVT::v2f64, MVT::v4f64 }) {
1334 setOperationAction(ISD::FMA, VT, Legal);
1335 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1336 }
1337 }
1338
1339 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1340 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1341 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1342 }
1343
1344 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1345 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1346 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1347 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1348
1349 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1350 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1351 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1352 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1353 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1354 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1355
1356 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1357 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1358
1359 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1360 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1361 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1362 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1363 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1364
1365 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1366 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1367 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1368 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1369 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1370 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1371 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1372 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1373 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1374 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1375 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1376 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1377
1378 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1379 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1380 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1381 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1382 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1383 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1384 }
1385
1386 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1387 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1388 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1389 }
1390
1391 if (HasInt256) {
1392 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1393 // when we have a 256bit-wide blend with immediate.
1394 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1395 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1396
1397 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1398 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1399 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1400 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1401 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1402 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1403 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1404 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1405 }
1406 }
1407
1408 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1409 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1410 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1411 setOperationAction(ISD::MSTORE, VT, Legal);
1412 }
1413
1414 // Extract subvector is special because the value type
1415 // (result) is 128-bit but the source is 256-bit wide.
1416 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1417 MVT::v4f32, MVT::v2f64 }) {
1418 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1419 }
1420
1421 // Custom lower several nodes for 256-bit types.
1422 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1423 MVT::v8f32, MVT::v4f64 }) {
1424 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1425 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1426 setOperationAction(ISD::VSELECT, VT, Custom);
1427 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1428 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1429 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1430 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1431 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1432 setOperationAction(ISD::STORE, VT, Custom);
1433 }
1434
1435 if (HasInt256) {
1436 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1437
1438 // Custom legalize 2x32 to get a little better code.
1439 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1440 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1441
1442 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1443 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1444 setOperationAction(ISD::MGATHER, VT, Custom);
1445 }
1446 }
1447
1448 // This block controls legalization of the mask vector sizes that are
1449 // available with AVX512. 512-bit vectors are in a separate block controlled
1450 // by useAVX512Regs.
1451 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1452 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1453 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1454 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1455 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1456 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1457
1458 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1459 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1460 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1461
1462 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1463 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1464 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1465 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1466 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1467 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1468 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1469 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1470 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1471 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1472 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1473 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1474
1475 // There is no byte sized k-register load or store without AVX512DQ.
1476 if (!Subtarget.hasDQI()) {
1477 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1478 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1479 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1480 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1481
1482 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1483 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1484 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1485 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1486 }
1487
1488 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1489 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1490 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1491 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1492 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1493 }
1494
1495 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1496 setOperationAction(ISD::VSELECT, VT, Expand);
1497
1498 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1499 setOperationAction(ISD::SETCC, VT, Custom);
1500 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1501 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1502 setOperationAction(ISD::SELECT, VT, Custom);
1503 setOperationAction(ISD::TRUNCATE, VT, Custom);
1504
1505 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1506 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1507 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1508 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1509 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1510 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1511 }
1512
1513 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1514 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1515 }
1516
1517 // This block controls legalization for 512-bit operations with 32/64 bit
1518 // elements. 512-bits can be disabled based on prefer-vector-width and
1519 // required-vector-width function attributes.
1520 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1521 bool HasBWI = Subtarget.hasBWI();
1522
1523 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1524 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1525 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1526 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1527 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1528 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1529
1530 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1531 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1532 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1533 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1534 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1535 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1536 if (HasBWI)
1537 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1538 }
1539
1540 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1541 setOperationAction(ISD::FNEG, VT, Custom);
1542 setOperationAction(ISD::FABS, VT, Custom);
1543 setOperationAction(ISD::FMA, VT, Legal);
1544 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1545 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1546 }
1547
1548 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1549 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1550 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1551 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1552 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1553 }
1554 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1555 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1556 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1557 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1558 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1559 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1560 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1561 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1562
1563 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1564 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1565 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1566 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1567 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1568 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1569 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1570 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1571 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1572 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1573 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1574 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1575
1576 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1577 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1578 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1579 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1580 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1581 if (HasBWI)
1582 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1583
1584 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1585 // to 512-bit rather than use the AVX2 instructions so that we can use
1586 // k-masks.
1587 if (!Subtarget.hasVLX()) {
1588 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1589 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1590 setOperationAction(ISD::MLOAD, VT, Custom);
1591 setOperationAction(ISD::MSTORE, VT, Custom);
1592 }
1593 }
1594
1595 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1596 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1597 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1598 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1599 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1600 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1601 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1602 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1603 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1604 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1605 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1606 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1607 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1608
1609 if (HasBWI) {
1610 // Extends from v64i1 masks to 512-bit vectors.
1611 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1612 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1613 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1614 }
1615
1616 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1617 setOperationAction(ISD::FFLOOR, VT, Legal);
1618 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1619 setOperationAction(ISD::FCEIL, VT, Legal);
1620 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1621 setOperationAction(ISD::FTRUNC, VT, Legal);
1622 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1623 setOperationAction(ISD::FRINT, VT, Legal);
1624 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1625 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1626 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1627 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1628 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1629
1630 setOperationAction(ISD::FROUND, VT, Custom);
1631 }
1632
1633 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1634 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1635 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1636 }
1637
1638 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1639 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1640 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1641 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1642
1643 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1644 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1645 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1646 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1647
1648 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1649 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1650 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1651 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1652 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1653 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1654
1655 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1656 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1657
1658 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1659
1660 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1661 setOperationAction(ISD::SRL, VT, Custom);
1662 setOperationAction(ISD::SHL, VT, Custom);
1663 setOperationAction(ISD::SRA, VT, Custom);
1664 setOperationAction(ISD::SETCC, VT, Custom);
1665
1666 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1667 // setcc all the way to isel and prefer SETGT in some isel patterns.
1668 setCondCodeAction(ISD::SETLT, VT, Custom);
1669 setCondCodeAction(ISD::SETLE, VT, Custom);
1670 }
1671 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1672 setOperationAction(ISD::SMAX, VT, Legal);
1673 setOperationAction(ISD::UMAX, VT, Legal);
1674 setOperationAction(ISD::SMIN, VT, Legal);
1675 setOperationAction(ISD::UMIN, VT, Legal);
1676 setOperationAction(ISD::ABS, VT, Legal);
1677 setOperationAction(ISD::CTPOP, VT, Custom);
1678 setOperationAction(ISD::ROTL, VT, Custom);
1679 setOperationAction(ISD::ROTR, VT, Custom);
1680 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1681 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1682 }
1683
1684 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1685 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1686 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1687 setOperationAction(ISD::CTLZ, VT, Custom);
1688 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1689 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1690 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1691 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1692 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1693 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1694 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1695 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1696 }
1697
1698 if (Subtarget.hasDQI()) {
1699 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1700 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1701 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1702 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1703 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1704 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1705 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1706 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1707
1708 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1709 }
1710
1711 if (Subtarget.hasCDI()) {
1712 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1713 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1714 setOperationAction(ISD::CTLZ, VT, Legal);
1715 }
1716 } // Subtarget.hasCDI()
1717
1718 if (Subtarget.hasVPOPCNTDQ()) {
1719 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1720 setOperationAction(ISD::CTPOP, VT, Legal);
1721 }
1722
1723 // Extract subvector is special because the value type
1724 // (result) is 256-bit but the source is 512-bit wide.
1725 // 128-bit was made Legal under AVX1.
1726 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1727 MVT::v8f32, MVT::v4f64 })
1728 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1729
1730 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1731 MVT::v16f32, MVT::v8f64 }) {
1732 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1733 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1734 setOperationAction(ISD::SELECT, VT, Custom);
1735 setOperationAction(ISD::VSELECT, VT, Custom);
1736 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1737 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1738 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1739 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1740 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1741 }
1742
1743 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1744 setOperationAction(ISD::MLOAD, VT, Legal);
1745 setOperationAction(ISD::MSTORE, VT, Legal);
1746 setOperationAction(ISD::MGATHER, VT, Custom);
1747 setOperationAction(ISD::MSCATTER, VT, Custom);
1748 }
1749 if (HasBWI) {
1750 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1751 setOperationAction(ISD::MLOAD, VT, Legal);
1752 setOperationAction(ISD::MSTORE, VT, Legal);
1753 }
1754 } else {
1755 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1756 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1757 }
1758
1759 if (Subtarget.hasVBMI2()) {
1760 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1761 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1762 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1763 setOperationAction(ISD::FSHL, VT, Custom);
1764 setOperationAction(ISD::FSHR, VT, Custom);
1765 }
1766
1767 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1768 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1769 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1770 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1771 }
1772 }// useAVX512Regs
1773
1774 // This block controls legalization for operations that don't have
1775 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1776 // narrower widths.
1777 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1778 // These operations are handled on non-VLX by artificially widening in
1779 // isel patterns.
1780
1781 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1782 Subtarget.hasVLX() ? Legal : Custom);
1783 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1784 Subtarget.hasVLX() ? Legal : Custom);
1785 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1786 Subtarget.hasVLX() ? Legal : Custom);
1787 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1788 Subtarget.hasVLX() ? Legal : Custom);
1789 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1790 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1791 Subtarget.hasVLX() ? Legal : Custom);
1792 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1793 Subtarget.hasVLX() ? Legal : Custom);
1794 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1795 Subtarget.hasVLX() ? Legal : Custom);
1796 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1797 Subtarget.hasVLX() ? Legal : Custom);
1798
1799 if (Subtarget.hasDQI()) {
1800 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1801 // v2f32 UINT_TO_FP is already custom under SSE2.
1802 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1804, __extension__ __PRETTY_FUNCTION__))
1803 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1804, __extension__ __PRETTY_FUNCTION__))
1804 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP
, MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1804, __extension__ __PRETTY_FUNCTION__))
;
1805 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1806 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1807 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1808 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1809 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1810 }
1811
1812 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1813 setOperationAction(ISD::SMAX, VT, Legal);
1814 setOperationAction(ISD::UMAX, VT, Legal);
1815 setOperationAction(ISD::SMIN, VT, Legal);
1816 setOperationAction(ISD::UMIN, VT, Legal);
1817 setOperationAction(ISD::ABS, VT, Legal);
1818 }
1819
1820 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1821 setOperationAction(ISD::ROTL, VT, Custom);
1822 setOperationAction(ISD::ROTR, VT, Custom);
1823 }
1824
1825 // Custom legalize 2x32 to get a little better code.
1826 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1827 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1828
1829 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1830 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1831 setOperationAction(ISD::MSCATTER, VT, Custom);
1832
1833 if (Subtarget.hasDQI()) {
1834 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1835 setOperationAction(ISD::SINT_TO_FP, VT,
1836 Subtarget.hasVLX() ? Legal : Custom);
1837 setOperationAction(ISD::UINT_TO_FP, VT,
1838 Subtarget.hasVLX() ? Legal : Custom);
1839 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1840 Subtarget.hasVLX() ? Legal : Custom);
1841 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1842 Subtarget.hasVLX() ? Legal : Custom);
1843 setOperationAction(ISD::FP_TO_SINT, VT,
1844 Subtarget.hasVLX() ? Legal : Custom);
1845 setOperationAction(ISD::FP_TO_UINT, VT,
1846 Subtarget.hasVLX() ? Legal : Custom);
1847 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1848 Subtarget.hasVLX() ? Legal : Custom);
1849 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1850 Subtarget.hasVLX() ? Legal : Custom);
1851 setOperationAction(ISD::MUL, VT, Legal);
1852 }
1853 }
1854
1855 if (Subtarget.hasCDI()) {
1856 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1857 setOperationAction(ISD::CTLZ, VT, Legal);
1858 }
1859 } // Subtarget.hasCDI()
1860
1861 if (Subtarget.hasVPOPCNTDQ()) {
1862 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1863 setOperationAction(ISD::CTPOP, VT, Legal);
1864 }
1865 }
1866
1867 // This block control legalization of v32i1/v64i1 which are available with
1868 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1869 // useBWIRegs.
1870 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1871 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1872 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1873
1874 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1875 setOperationAction(ISD::VSELECT, VT, Expand);
1876 setOperationAction(ISD::TRUNCATE, VT, Custom);
1877 setOperationAction(ISD::SETCC, VT, Custom);
1878 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1879 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1880 setOperationAction(ISD::SELECT, VT, Custom);
1881 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1882 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1883 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1884 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1885 }
1886
1887 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1888 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1889
1890 // Extends from v32i1 masks to 256-bit vectors.
1891 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1892 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1893 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1894
1895 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1896 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1897 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1898 }
1899
1900 // These operations are handled on non-VLX by artificially widening in
1901 // isel patterns.
1902 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1903
1904 if (Subtarget.hasBITALG()) {
1905 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1906 setOperationAction(ISD::CTPOP, VT, Legal);
1907 }
1908 }
1909
1910 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
1911 auto setGroup = [&] (MVT VT) {
1912 setOperationAction(ISD::FADD, VT, Legal);
1913 setOperationAction(ISD::STRICT_FADD, VT, Legal);
1914 setOperationAction(ISD::FSUB, VT, Legal);
1915 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
1916 setOperationAction(ISD::FMUL, VT, Legal);
1917 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
1918 setOperationAction(ISD::FDIV, VT, Legal);
1919 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
1920 setOperationAction(ISD::FSQRT, VT, Legal);
1921 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
1922
1923 setOperationAction(ISD::FFLOOR, VT, Legal);
1924 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1925 setOperationAction(ISD::FCEIL, VT, Legal);
1926 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1927 setOperationAction(ISD::FTRUNC, VT, Legal);
1928 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1929 setOperationAction(ISD::FRINT, VT, Legal);
1930 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1931 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1932 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1933
1934 setOperationAction(ISD::LOAD, VT, Legal);
1935 setOperationAction(ISD::STORE, VT, Legal);
1936
1937 setOperationAction(ISD::FMA, VT, Legal);
1938 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1939 setOperationAction(ISD::VSELECT, VT, Legal);
1940 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1941 setOperationAction(ISD::SELECT, VT, Custom);
1942
1943 setOperationAction(ISD::FNEG, VT, Custom);
1944 setOperationAction(ISD::FABS, VT, Custom);
1945 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1946 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1947 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1948 };
1949
1950 // AVX512_FP16 scalar operations
1951 setGroup(MVT::f16);
1952 addRegisterClass(MVT::f16, &X86::FR16XRegClass);
1953 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
1954 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
1955 setOperationAction(ISD::SETCC, MVT::f16, Custom);
1956 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
1957 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
1958 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
1959 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
1960 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
1961 if (isTypeLegal(MVT::f80)) {
1962 setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
1963 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
1964 }
1965
1966 setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
1967 setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
1968
1969 if (Subtarget.useAVX512Regs()) {
1970 setGroup(MVT::v32f16);
1971 addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
1972 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
1973 setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
1974 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
1975 setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
1976 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
1977 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
1978 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
1979 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
1980
1981 setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
1982 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
1983 setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
1984 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
1985 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
1986 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
1987 MVT::v32i16);
1988 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
1989 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
1990 MVT::v32i16);
1991 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
1992 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
1993 MVT::v32i16);
1994 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
1995 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
1996 MVT::v32i16);
1997
1998 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
1999 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
2000 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
2001
2002 setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
2003 setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
2004
2005 setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
2006 setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
2007 }
2008
2009 if (Subtarget.hasVLX()) {
2010 addRegisterClass(MVT::v8f16, &X86::VR128XRegClass);
2011 addRegisterClass(MVT::v16f16, &X86::VR256XRegClass);
2012 setGroup(MVT::v8f16);
2013 setGroup(MVT::v16f16);
2014
2015 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
2016 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
2017 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
2018 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
2019 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
2020 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
2021 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
2022 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
2023 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
2024 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
2025
2026 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
2027 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
2028 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
2029 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
2030 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
2031 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
2032
2033 // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
2034 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
2035 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
2036
2037 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
2038 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
2039 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
2040
2041 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
2042 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
2043 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
2044 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
2045
2046 // Need to custom widen these to prevent scalarization.
2047 setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
2048 setOperationAction(ISD::STORE, MVT::v4f16, Custom);
2049 }
2050
2051 // Support fp16 0 immediate
2052 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
2053 }
2054
2055 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2056 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
2057 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
2058 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
2059 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
2060 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
2061
2062 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
2063 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
2064 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
2065 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
2066 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
2067
2068 if (Subtarget.hasBWI()) {
2069 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
2070 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
2071 }
2072
2073 if (Subtarget.hasFP16()) {
2074 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
2075 setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
2076 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
2077 setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
2078 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
2079 setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
2080 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
2081 setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
2082 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
2083 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
2084 setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
2085 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
2086 setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
2087 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
2088 setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
2089 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
2090 setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
2091 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
2092 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
2093 setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
2094 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
2095 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
2096 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
2097 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
2098 setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
2099 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
2100 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
2101 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
2102 }
2103
2104 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
2105 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
2106 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
2107 }
2108
2109 if (Subtarget.hasAMXTILE()) {
2110 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
2111 }
2112
2113 // We want to custom lower some of our intrinsics.
2114 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
2115 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
2116 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
2117 if (!Subtarget.is64Bit()) {
2118 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
2119 }
2120
2121 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
2122 // handle type legalization for these operations here.
2123 //
2124 // FIXME: We really should do custom legalization for addition and
2125 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
2126 // than generic legalization for 64-bit multiplication-with-overflow, though.
2127 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2128 if (VT == MVT::i64 && !Subtarget.is64Bit())
2129 continue;
2130 // Add/Sub/Mul with overflow operations are custom lowered.
2131 setOperationAction(ISD::SADDO, VT, Custom);
2132 setOperationAction(ISD::UADDO, VT, Custom);
2133 setOperationAction(ISD::SSUBO, VT, Custom);
2134 setOperationAction(ISD::USUBO, VT, Custom);
2135 setOperationAction(ISD::SMULO, VT, Custom);
2136 setOperationAction(ISD::UMULO, VT, Custom);
2137
2138 // Support carry in as value rather than glue.
2139 setOperationAction(ISD::ADDCARRY, VT, Custom);
2140 setOperationAction(ISD::SUBCARRY, VT, Custom);
2141 setOperationAction(ISD::SETCCCARRY, VT, Custom);
2142 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
2143 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
2144 }
2145
2146 if (!Subtarget.is64Bit()) {
2147 // These libcalls are not available in 32-bit.
2148 setLibcallName(RTLIB::SHL_I128, nullptr);
2149 setLibcallName(RTLIB::SRL_I128, nullptr);
2150 setLibcallName(RTLIB::SRA_I128, nullptr);
2151 setLibcallName(RTLIB::MUL_I128, nullptr);
2152 setLibcallName(RTLIB::MULO_I128, nullptr);
2153 }
2154
2155 // Combine sin / cos into _sincos_stret if it is available.
2156 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
2157 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
2158 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
2159 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
2160 }
2161
2162 if (Subtarget.isTargetWin64()) {
2163 setOperationAction(ISD::SDIV, MVT::i128, Custom);
2164 setOperationAction(ISD::UDIV, MVT::i128, Custom);
2165 setOperationAction(ISD::SREM, MVT::i128, Custom);
2166 setOperationAction(ISD::UREM, MVT::i128, Custom);
2167 }
2168
2169 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
2170 // is. We should promote the value to 64-bits to solve this.
2171 // This is what the CRT headers do - `fmodf` is an inline header
2172 // function casting to f64 and calling `fmod`.
2173 if (Subtarget.is32Bit() &&
2174 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
2175 for (ISD::NodeType Op :
2176 {ISD::FCEIL, ISD::STRICT_FCEIL,
2177 ISD::FCOS, ISD::STRICT_FCOS,
2178 ISD::FEXP, ISD::STRICT_FEXP,
2179 ISD::FFLOOR, ISD::STRICT_FFLOOR,
2180 ISD::FREM, ISD::STRICT_FREM,
2181 ISD::FLOG, ISD::STRICT_FLOG,
2182 ISD::FLOG10, ISD::STRICT_FLOG10,
2183 ISD::FPOW, ISD::STRICT_FPOW,
2184 ISD::FSIN, ISD::STRICT_FSIN})
2185 if (isOperationExpand(Op, MVT::f32))
2186 setOperationAction(Op, MVT::f32, Promote);
2187
2188 // We have target-specific dag combine patterns for the following nodes:
2189 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
2190 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
2191 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
2192 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
2193 setTargetDAGCombine(ISD::CONCAT_VECTORS);
2194 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
2195 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
2196 setTargetDAGCombine(ISD::BITCAST);
2197 setTargetDAGCombine(ISD::VSELECT);
2198 setTargetDAGCombine(ISD::SELECT);
2199 setTargetDAGCombine(ISD::SHL);
2200 setTargetDAGCombine(ISD::SRA);
2201 setTargetDAGCombine(ISD::SRL);
2202 setTargetDAGCombine(ISD::OR);
2203 setTargetDAGCombine(ISD::AND);
2204 setTargetDAGCombine(ISD::ADD);
2205 setTargetDAGCombine(ISD::FADD);
2206 setTargetDAGCombine(ISD::FSUB);
2207 setTargetDAGCombine(ISD::FNEG);
2208 setTargetDAGCombine(ISD::FMA);
2209 setTargetDAGCombine(ISD::STRICT_FMA);
2210 setTargetDAGCombine(ISD::FMINNUM);
2211 setTargetDAGCombine(ISD::FMAXNUM);
2212 setTargetDAGCombine(ISD::SUB);
2213 setTargetDAGCombine(ISD::LOAD);
2214 setTargetDAGCombine(ISD::MLOAD);
2215 setTargetDAGCombine(ISD::STORE);
2216 setTargetDAGCombine(ISD::MSTORE);
2217 setTargetDAGCombine(ISD::TRUNCATE);
2218 setTargetDAGCombine(ISD::ZERO_EXTEND);
2219 setTargetDAGCombine(ISD::ANY_EXTEND);
2220 setTargetDAGCombine(ISD::SIGN_EXTEND);
2221 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2222 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2223 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2224 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2225 setTargetDAGCombine(ISD::SINT_TO_FP);
2226 setTargetDAGCombine(ISD::UINT_TO_FP);
2227 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2228 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2229 setTargetDAGCombine(ISD::SETCC);
2230 setTargetDAGCombine(ISD::MUL);
2231 setTargetDAGCombine(ISD::XOR);
2232 setTargetDAGCombine(ISD::MSCATTER);
2233 setTargetDAGCombine(ISD::MGATHER);
2234 setTargetDAGCombine(ISD::FP16_TO_FP);
2235 setTargetDAGCombine(ISD::FP_EXTEND);
2236 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
2237 setTargetDAGCombine(ISD::FP_ROUND);
2238
2239 computeRegisterProperties(Subtarget.getRegisterInfo());
2240
2241 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2242 MaxStoresPerMemsetOptSize = 8;
2243 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2244 MaxStoresPerMemcpyOptSize = 4;
2245 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2246 MaxStoresPerMemmoveOptSize = 4;
2247
2248 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2249 // that needs to benchmarked and balanced with the potential use of vector
2250 // load/store types (PR33329, PR33914).
2251 MaxLoadsPerMemcmp = 2;
2252 MaxLoadsPerMemcmpOptSize = 2;
2253
2254 // Default loop alignment, which can be overridden by -align-loops.
2255 setPrefLoopAlignment(Align(16));
2256
2257 // An out-of-order CPU can speculatively execute past a predictable branch,
2258 // but a conditional move could be stalled by an expensive earlier operation.
2259 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2260 EnableExtLdPromotion = true;
2261 setPrefFunctionAlignment(Align(16));
2262
2263 verifyIntrinsicTables();
2264
2265 // Default to having -disable-strictnode-mutation on
2266 IsStrictFPEnabled = true;
2267}
2268
2269// This has so far only been implemented for 64-bit MachO.
2270bool X86TargetLowering::useLoadStackGuardNode() const {
2271 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2272}
2273
2274bool X86TargetLowering::useStackGuardXorFP() const {
2275 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2276 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2277}
2278
2279SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2280 const SDLoc &DL) const {
2281 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2282 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2283 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2284 return SDValue(Node, 0);
2285}
2286
2287TargetLoweringBase::LegalizeTypeAction
2288X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2289 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2290 !Subtarget.hasBWI())
2291 return TypeSplitVector;
2292
2293 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2294 VT.getVectorElementType() != MVT::i1)
2295 return TypeWidenVector;
2296
2297 return TargetLoweringBase::getPreferredVectorAction(VT);
2298}
2299
2300static std::pair<MVT, unsigned>
2301handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2302 const X86Subtarget &Subtarget) {
2303 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2304 // convention is one that uses k registers.
2305 if (NumElts == 2)
2306 return {MVT::v2i64, 1};
2307 if (NumElts == 4)
2308 return {MVT::v4i32, 1};
2309 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2310 CC != CallingConv::Intel_OCL_BI)
2311 return {MVT::v8i16, 1};
2312 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2313 CC != CallingConv::Intel_OCL_BI)
2314 return {MVT::v16i8, 1};
2315 // v32i1 passes in ymm unless we have BWI and the calling convention is
2316 // regcall.
2317 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2318 return {MVT::v32i8, 1};
2319 // Split v64i1 vectors if we don't have v64i8 available.
2320 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2321 if (Subtarget.useAVX512Regs())
2322 return {MVT::v64i8, 1};
2323 return {MVT::v32i8, 2};
2324 }
2325
2326 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2327 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2328 NumElts > 64)
2329 return {MVT::i8, NumElts};
2330
2331 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2332}
2333
2334MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2335 CallingConv::ID CC,
2336 EVT VT) const {
2337 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2338 Subtarget.hasAVX512()) {
2339 unsigned NumElts = VT.getVectorNumElements();
2340
2341 MVT RegisterVT;
2342 unsigned NumRegisters;
2343 std::tie(RegisterVT, NumRegisters) =
2344 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2345 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2346 return RegisterVT;
2347 }
2348
2349 // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
2350 // So its default register type is f16. We override the type to v8f16 here.
2351 if (VT == MVT::v3f16 && Subtarget.hasFP16())
2352 return MVT::v8f16;
2353
2354 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2355}
2356
2357unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2358 CallingConv::ID CC,
2359 EVT VT) const {
2360 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2361 Subtarget.hasAVX512()) {
2362 unsigned NumElts = VT.getVectorNumElements();
2363
2364 MVT RegisterVT;
2365 unsigned NumRegisters;
2366 std::tie(RegisterVT, NumRegisters) =
2367 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2368 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2369 return NumRegisters;
2370 }
2371
2372 // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
2373 // So its default register number is 3. We override the number to 1 here.
2374 if (VT == MVT::v3f16 && Subtarget.hasFP16())
2375 return 1;
2376
2377 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2378}
2379
2380unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2381 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2382 unsigned &NumIntermediates, MVT &RegisterVT) const {
2383 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2384 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2385 Subtarget.hasAVX512() &&
2386 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2387 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2388 VT.getVectorNumElements() > 64)) {
2389 RegisterVT = MVT::i8;
2390 IntermediateVT = MVT::i1;
2391 NumIntermediates = VT.getVectorNumElements();
2392 return NumIntermediates;
2393 }
2394
2395 // Split v64i1 vectors if we don't have v64i8 available.
2396 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2397 CC != CallingConv::X86_RegCall) {
2398 RegisterVT = MVT::v32i8;
2399 IntermediateVT = MVT::v32i1;
2400 NumIntermediates = 2;
2401 return 2;
2402 }
2403
2404 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2405 NumIntermediates, RegisterVT);
2406}
2407
2408EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2409 LLVMContext& Context,
2410 EVT VT) const {
2411 if (!VT.isVector())
2412 return MVT::i8;
2413
2414 if (Subtarget.hasAVX512()) {
2415 // Figure out what this type will be legalized to.
2416 EVT LegalVT = VT;
2417 while (getTypeAction(Context, LegalVT) != TypeLegal)
2418 LegalVT = getTypeToTransformTo(Context, LegalVT);
2419
2420 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2421 if (LegalVT.getSimpleVT().is512BitVector())
2422 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2423
2424 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2425 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2426 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2427 // vXi16/vXi8.
2428 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2429 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2430 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2431 }
2432 }
2433
2434 return VT.changeVectorElementTypeToInteger();
2435}
2436
2437/// Helper for getByValTypeAlignment to determine
2438/// the desired ByVal argument alignment.
2439static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2440 if (MaxAlign == 16)
2441 return;
2442 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2443 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2444 MaxAlign = Align(16);
2445 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2446 Align EltAlign;
2447 getMaxByValAlign(ATy->getElementType(), EltAlign);
2448 if (EltAlign > MaxAlign)
2449 MaxAlign = EltAlign;
2450 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2451 for (auto *EltTy : STy->elements()) {
2452 Align EltAlign;
2453 getMaxByValAlign(EltTy, EltAlign);
2454 if (EltAlign > MaxAlign)
2455 MaxAlign = EltAlign;
2456 if (MaxAlign == 16)
2457 break;
2458 }
2459 }
2460}
2461
2462/// Return the desired alignment for ByVal aggregate
2463/// function arguments in the caller parameter area. For X86, aggregates
2464/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2465/// are at 4-byte boundaries.
2466unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2467 const DataLayout &DL) const {
2468 if (Subtarget.is64Bit()) {
2469 // Max of 8 and alignment of type.
2470 Align TyAlign = DL.getABITypeAlign(Ty);
2471 if (TyAlign > 8)
2472 return TyAlign.value();
2473 return 8;
2474 }
2475
2476 Align Alignment(4);
2477 if (Subtarget.hasSSE1())
2478 getMaxByValAlign(Ty, Alignment);
2479 return Alignment.value();
2480}
2481
2482/// It returns EVT::Other if the type should be determined using generic
2483/// target-independent logic.
2484/// For vector ops we check that the overall size isn't larger than our
2485/// preferred vector width.
2486EVT X86TargetLowering::getOptimalMemOpType(
2487 const MemOp &Op, const AttributeList &FuncAttributes) const {
2488 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2489 if (Op.size() >= 16 &&
2490 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2491 // FIXME: Check if unaligned 64-byte accesses are slow.
2492 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2493 (Subtarget.getPreferVectorWidth() >= 512)) {
2494 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2495 }
2496 // FIXME: Check if unaligned 32-byte accesses are slow.
2497 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2498 (Subtarget.getPreferVectorWidth() >= 256)) {
2499 // Although this isn't a well-supported type for AVX1, we'll let
2500 // legalization and shuffle lowering produce the optimal codegen. If we
2501 // choose an optimal type with a vector element larger than a byte,
2502 // getMemsetStores() may create an intermediate splat (using an integer
2503 // multiply) before we splat as a vector.
2504 return MVT::v32i8;
2505 }
2506 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2507 return MVT::v16i8;
2508 // TODO: Can SSE1 handle a byte vector?
2509 // If we have SSE1 registers we should be able to use them.
2510 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2511 (Subtarget.getPreferVectorWidth() >= 128))
2512 return MVT::v4f32;
2513 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2514 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2515 // Do not use f64 to lower memcpy if source is string constant. It's
2516 // better to use i32 to avoid the loads.
2517 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2518 // The gymnastics of splatting a byte value into an XMM register and then
2519 // only using 8-byte stores (because this is a CPU with slow unaligned
2520 // 16-byte accesses) makes that a loser.
2521 return MVT::f64;
2522 }
2523 }
2524 // This is a compromise. If we reach here, unaligned accesses may be slow on
2525 // this target. However, creating smaller, aligned accesses could be even
2526 // slower and would certainly be a lot more code.
2527 if (Subtarget.is64Bit() && Op.size() >= 8)
2528 return MVT::i64;
2529 return MVT::i32;
2530}
2531
2532bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2533 if (VT == MVT::f32)
2534 return X86ScalarSSEf32;
2535 if (VT == MVT::f64)
2536 return X86ScalarSSEf64;
2537 return true;
2538}
2539
2540bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2541 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2542 bool *Fast) const {
2543 if (Fast) {
2544 switch (VT.getSizeInBits()) {
2545 default:
2546 // 8-byte and under are always assumed to be fast.
2547 *Fast = true;
2548 break;
2549 case 128:
2550 *Fast = !Subtarget.isUnalignedMem16Slow();
2551 break;
2552 case 256:
2553 *Fast = !Subtarget.isUnalignedMem32Slow();
2554 break;
2555 // TODO: What about AVX-512 (512-bit) accesses?
2556 }
2557 }
2558 // NonTemporal vector memory ops must be aligned.
2559 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2560 // NT loads can only be vector aligned, so if its less aligned than the
2561 // minimum vector size (which we can split the vector down to), we might as
2562 // well use a regular unaligned vector load.
2563 // We don't have any NT loads pre-SSE41.
2564 if (!!(Flags & MachineMemOperand::MOLoad))
2565 return (Alignment < 16 || !Subtarget.hasSSE41());
2566 return false;
2567 }
2568 // Misaligned accesses of any size are always allowed.
2569 return true;
2570}
2571
2572/// Return the entry encoding for a jump table in the
2573/// current function. The returned value is a member of the
2574/// MachineJumpTableInfo::JTEntryKind enum.
2575unsigned X86TargetLowering::getJumpTableEncoding() const {
2576 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2577 // symbol.
2578 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2579 return MachineJumpTableInfo::EK_Custom32;
2580
2581 // Otherwise, use the normal jump table encoding heuristics.
2582 return TargetLowering::getJumpTableEncoding();
2583}
2584
2585bool X86TargetLowering::useSoftFloat() const {
2586 return Subtarget.useSoftFloat();
2587}
2588
2589void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2590 ArgListTy &Args) const {
2591
2592 // Only relabel X86-32 for C / Stdcall CCs.
2593 if (Subtarget.is64Bit())
2594 return;
2595 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2596 return;
2597 unsigned ParamRegs = 0;
2598 if (auto *M = MF->getFunction().getParent())
2599 ParamRegs = M->getNumberRegisterParameters();
2600
2601 // Mark the first N int arguments as having reg
2602 for (auto &Arg : Args) {
2603 Type *T = Arg.Ty;
2604 if (T->isIntOrPtrTy())
2605 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2606 unsigned numRegs = 1;
2607 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2608 numRegs = 2;
2609 if (ParamRegs < numRegs)
2610 return;
2611 ParamRegs -= numRegs;
2612 Arg.IsInReg = true;
2613 }
2614 }
2615}
2616
2617const MCExpr *
2618X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2619 const MachineBasicBlock *MBB,
2620 unsigned uid,MCContext &Ctx) const{
2621 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2621, __extension__ __PRETTY_FUNCTION__))
;
2622 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2623 // entries.
2624 return MCSymbolRefExpr::create(MBB->getSymbol(),
2625 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2626}
2627
2628/// Returns relocation base for the given PIC jumptable.
2629SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2630 SelectionDAG &DAG) const {
2631 if (!Subtarget.is64Bit())
2632 // This doesn't have SDLoc associated with it, but is not really the
2633 // same as a Register.
2634 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2635 getPointerTy(DAG.getDataLayout()));
2636 return Table;
2637}
2638
2639/// This returns the relocation base for the given PIC jumptable,
2640/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2641const MCExpr *X86TargetLowering::
2642getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2643 MCContext &Ctx) const {
2644 // X86-64 uses RIP relative addressing based on the jump table label.
2645 if (Subtarget.isPICStyleRIPRel())
2646 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2647
2648 // Otherwise, the reference is relative to the PIC base.
2649 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2650}
2651
2652std::pair<const TargetRegisterClass *, uint8_t>
2653X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2654 MVT VT) const {
2655 const TargetRegisterClass *RRC = nullptr;
2656 uint8_t Cost = 1;
2657 switch (VT.SimpleTy) {
2658 default:
2659 return TargetLowering::findRepresentativeClass(TRI, VT);
2660 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2661 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2662 break;
2663 case MVT::x86mmx:
2664 RRC = &X86::VR64RegClass;
2665 break;
2666 case MVT::f32: case MVT::f64:
2667 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2668 case MVT::v4f32: case MVT::v2f64:
2669 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2670 case MVT::v8f32: case MVT::v4f64:
2671 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2672 case MVT::v16f32: case MVT::v8f64:
2673 RRC = &X86::VR128XRegClass;
2674 break;
2675 }
2676 return std::make_pair(RRC, Cost);
2677}
2678
2679unsigned X86TargetLowering::getAddressSpace() const {
2680 if (Subtarget.is64Bit())
2681 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2682 return 256;
2683}
2684
2685static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2686 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2687 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2688}
2689
2690static Constant* SegmentOffset(IRBuilderBase &IRB,
2691 int Offset, unsigned AddressSpace) {
2692 return ConstantExpr::getIntToPtr(
2693 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2694 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2695}
2696
2697Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
2698 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2699 // tcbhead_t; use it instead of the usual global variable (see
2700 // sysdeps/{i386,x86_64}/nptl/tls.h)
2701 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2702 if (Subtarget.isTargetFuchsia()) {
2703 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2704 return SegmentOffset(IRB, 0x10, getAddressSpace());
2705 } else {
2706 unsigned AddressSpace = getAddressSpace();
2707 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
2708 // Specially, some users may customize the base reg and offset.
2709 int Offset = M->getStackProtectorGuardOffset();
2710 // If we don't set -stack-protector-guard-offset value:
2711 // %fs:0x28, unless we're using a Kernel code model, in which case
2712 // it's %gs:0x28. gs:0x14 on i386.
2713 if (Offset == INT_MAX2147483647)
2714 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2715
2716 StringRef GuardReg = M->getStackProtectorGuardReg();
2717 if (GuardReg == "fs")
2718 AddressSpace = X86AS::FS;
2719 else if (GuardReg == "gs")
2720 AddressSpace = X86AS::GS;
2721 return SegmentOffset(IRB, Offset, AddressSpace);
2722 }
2723 }
2724 return TargetLowering::getIRStackGuard(IRB);
2725}
2726
2727void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2728 // MSVC CRT provides functionalities for stack protection.
2729 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2730 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2731 // MSVC CRT has a global variable holding security cookie.
2732 M.getOrInsertGlobal("__security_cookie",
2733 Type::getInt8PtrTy(M.getContext()));
2734
2735 // MSVC CRT has a function to validate security cookie.
2736 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2737 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2738 Type::getInt8PtrTy(M.getContext()));
2739 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2740 F->setCallingConv(CallingConv::X86_FastCall);
2741 F->addParamAttr(0, Attribute::AttrKind::InReg);
2742 }
2743 return;
2744 }
2745
2746 StringRef GuardMode = M.getStackProtectorGuard();
2747
2748 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2749 if ((GuardMode == "tls" || GuardMode.empty()) &&
2750 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2751 return;
2752 TargetLowering::insertSSPDeclarations(M);
2753}
2754
2755Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2756 // MSVC CRT has a global variable holding security cookie.
2757 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2758 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2759 return M.getGlobalVariable("__security_cookie");
2760 }
2761 return TargetLowering::getSDagStackGuard(M);
2762}
2763
2764Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2765 // MSVC CRT has a function to validate security cookie.
2766 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2767 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2768 return M.getFunction("__security_check_cookie");
2769 }
2770 return TargetLowering::getSSPStackGuardCheck(M);
2771}
2772
2773Value *
2774X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
2775 if (Subtarget.getTargetTriple().isOSContiki())
2776 return getDefaultSafeStackPointerLocation(IRB, false);
2777
2778 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2779 // definition of TLS_SLOT_SAFESTACK in
2780 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2781 if (Subtarget.isTargetAndroid()) {
2782 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2783 // %gs:0x24 on i386
2784 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2785 return SegmentOffset(IRB, Offset, getAddressSpace());
2786 }
2787
2788 // Fuchsia is similar.
2789 if (Subtarget.isTargetFuchsia()) {
2790 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2791 return SegmentOffset(IRB, 0x18, getAddressSpace());
2792 }
2793
2794 return TargetLowering::getSafeStackPointerLocation(IRB);
2795}
2796
2797//===----------------------------------------------------------------------===//
2798// Return Value Calling Convention Implementation
2799//===----------------------------------------------------------------------===//
2800
2801bool X86TargetLowering::CanLowerReturn(
2802 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2803 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2804 SmallVector<CCValAssign, 16> RVLocs;
2805 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2806 return CCInfo.CheckReturn(Outs, RetCC_X86);
2807}
2808
2809const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2810 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2811 return ScratchRegs;
2812}
2813
2814/// Lowers masks values (v*i1) to the local register values
2815/// \returns DAG node after lowering to register type
2816static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2817 const SDLoc &Dl, SelectionDAG &DAG) {
2818 EVT ValVT = ValArg.getValueType();
2819
2820 if (ValVT == MVT::v1i1)
2821 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2822 DAG.getIntPtrConstant(0, Dl));
2823
2824 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2825 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2826 // Two stage lowering might be required
2827 // bitcast: v8i1 -> i8 / v16i1 -> i16
2828 // anyextend: i8 -> i32 / i16 -> i32
2829 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2830 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2831 if (ValLoc == MVT::i32)
2832 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2833 return ValToCopy;
2834 }
2835
2836 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2837 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2838 // One stage lowering is required
2839 // bitcast: v32i1 -> i32 / v64i1 -> i64
2840 return DAG.getBitcast(ValLoc, ValArg);
2841 }
2842
2843 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2844}
2845
2846/// Breaks v64i1 value into two registers and adds the new node to the DAG
2847static void Passv64i1ArgInRegs(
2848 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2849 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2850 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2851 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2851, __extension__ __PRETTY_FUNCTION__))
;
2852 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2852, __extension__ __PRETTY_FUNCTION__))
;
2853 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2853, __extension__ __PRETTY_FUNCTION__))
;
2854 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2855, __extension__ __PRETTY_FUNCTION__))
2855 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2855, __extension__ __PRETTY_FUNCTION__))
;
2856
2857 // Before splitting the value we cast it to i64
2858 Arg = DAG.getBitcast(MVT::i64, Arg);
2859
2860 // Splitting the value into two i32 types
2861 SDValue Lo, Hi;
2862 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2863 DAG.getConstant(0, Dl, MVT::i32));
2864 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2865 DAG.getConstant(1, Dl, MVT::i32));
2866
2867 // Attach the two i32 types into corresponding registers
2868 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2869 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2870}
2871
2872SDValue
2873X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2874 bool isVarArg,
2875 const SmallVectorImpl<ISD::OutputArg> &Outs,
2876 const SmallVectorImpl<SDValue> &OutVals,
2877 const SDLoc &dl, SelectionDAG &DAG) const {
2878 MachineFunction &MF = DAG.getMachineFunction();
2879 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2880
2881 // In some cases we need to disable registers from the default CSR list.
2882 // For example, when they are used for argument passing.
2883 bool ShouldDisableCalleeSavedRegister =
2884 CallConv == CallingConv::X86_RegCall ||
2885 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2886
2887 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2888 report_fatal_error("X86 interrupts may not return any value");
2889
2890 SmallVector<CCValAssign, 16> RVLocs;
2891 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2892 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2893
2894 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2895 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2896 ++I, ++OutsIndex) {
2897 CCValAssign &VA = RVLocs[I];
2898 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2898, __extension__ __PRETTY_FUNCTION__))
;
2899
2900 // Add the register to the CalleeSaveDisableRegs list.
2901 if (ShouldDisableCalleeSavedRegister)
2902 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2903
2904 SDValue ValToCopy = OutVals[OutsIndex];
2905 EVT ValVT = ValToCopy.getValueType();
2906
2907 // Promote values to the appropriate types.
2908 if (VA.getLocInfo() == CCValAssign::SExt)
2909 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2910 else if (VA.getLocInfo() == CCValAssign::ZExt)
2911 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2912 else if (VA.getLocInfo() == CCValAssign::AExt) {
2913 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2914 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2915 else
2916 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2917 }
2918 else if (VA.getLocInfo() == CCValAssign::BCvt)
2919 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2920
2921 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2922, __extension__ __PRETTY_FUNCTION__))
2922 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2922, __extension__ __PRETTY_FUNCTION__))
;
2923
2924 // Report an error if we have attempted to return a value via an XMM
2925 // register and SSE was disabled.
2926 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2927 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2928 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2929 } else if (!Subtarget.hasSSE2() &&
2930 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2931 ValVT == MVT::f64) {
2932 // When returning a double via an XMM register, report an error if SSE2 is
2933 // not enabled.
2934 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2935 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2936 }
2937
2938 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2939 // the RET instruction and handled by the FP Stackifier.
2940 if (VA.getLocReg() == X86::FP0 ||
2941 VA.getLocReg() == X86::FP1) {
2942 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2943 // change the value to the FP stack register class.
2944 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2945 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2946 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2947 // Don't emit a copytoreg.
2948 continue;
2949 }
2950
2951 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2952 // which is returned in RAX / RDX.
2953 if (Subtarget.is64Bit()) {
2954 if (ValVT == MVT::x86mmx) {
2955 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2956 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2957 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2958 ValToCopy);
2959 // If we don't have SSE2 available, convert to v4f32 so the generated
2960 // register is legal.
2961 if (!Subtarget.hasSSE2())
2962 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2963 }
2964 }
2965 }
2966
2967 if (VA.needsCustom()) {
2968 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2969, __extension__ __PRETTY_FUNCTION__))
2969 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2969, __extension__ __PRETTY_FUNCTION__))
;
2970
2971 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2972 Subtarget);
2973
2974 // Add the second register to the CalleeSaveDisableRegs list.
2975 if (ShouldDisableCalleeSavedRegister)
2976 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2977 } else {
2978 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2979 }
2980 }
2981
2982 SDValue Flag;
2983 SmallVector<SDValue, 6> RetOps;
2984 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2985 // Operand #1 = Bytes To Pop
2986 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2987 MVT::i32));
2988
2989 // Copy the result values into the output registers.
2990 for (auto &RetVal : RetVals) {
2991 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2992 RetOps.push_back(RetVal.second);
2993 continue; // Don't emit a copytoreg.
2994 }
2995
2996 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2997 Flag = Chain.getValue(1);
2998 RetOps.push_back(
2999 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
3000 }
3001
3002 // Swift calling convention does not require we copy the sret argument
3003 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
3004
3005 // All x86 ABIs require that for returning structs by value we copy
3006 // the sret argument into %rax/%eax (depending on ABI) for the return.
3007 // We saved the argument into a virtual register in the entry block,
3008 // so now we copy the value out and into %rax/%eax.
3009 //
3010 // Checking Function.hasStructRetAttr() here is insufficient because the IR
3011 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
3012 // false, then an sret argument may be implicitly inserted in the SelDAG. In
3013 // either case FuncInfo->setSRetReturnReg() will have been called.
3014 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3015 // When we have both sret and another return value, we should use the
3016 // original Chain stored in RetOps[0], instead of the current Chain updated
3017 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
3018
3019 // For the case of sret and another return value, we have
3020 // Chain_0 at the function entry
3021 // Chain_1 = getCopyToReg(Chain_0) in the above loop
3022 // If we use Chain_1 in getCopyFromReg, we will have
3023 // Val = getCopyFromReg(Chain_1)
3024 // Chain_2 = getCopyToReg(Chain_1, Val) from below
3025
3026 // getCopyToReg(Chain_0) will be glued together with
3027 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
3028 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
3029 // Data dependency from Unit B to Unit A due to usage of Val in
3030 // getCopyToReg(Chain_1, Val)
3031 // Chain dependency from Unit A to Unit B
3032
3033 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
3034 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
3035 getPointerTy(MF.getDataLayout()));
3036
3037 Register RetValReg
3038 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
3039 X86::RAX : X86::EAX;
3040 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
3041 Flag = Chain.getValue(1);
3042
3043 // RAX/EAX now acts like a return value.
3044 RetOps.push_back(
3045 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
3046
3047 // Add the returned register to the CalleeSaveDisableRegs list.
3048 if (ShouldDisableCalleeSavedRegister)
3049 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
3050 }
3051
3052 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3053 const MCPhysReg *I =
3054 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3055 if (I) {
3056 for (; *I; ++I) {
3057 if (X86::GR64RegClass.contains(*I))
3058 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3059 else
3060 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3060)
;
3061 }
3062 }
3063
3064 RetOps[0] = Chain; // Update chain.
3065
3066 // Add the flag if we have it.
3067 if (Flag.getNode())
3068 RetOps.push_back(Flag);
3069
3070 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
3071 if (CallConv == CallingConv::X86_INTR)
3072 opcode = X86ISD::IRET;
3073 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
3074}
3075
3076bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3077 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
3078 return false;
3079
3080 SDValue TCChain = Chain;
3081 SDNode *Copy = *N->use_begin();
3082 if (Copy->getOpcode() == ISD::CopyToReg) {
3083 // If the copy has a glue operand, we conservatively assume it isn't safe to
3084 // perform a tail call.
3085 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3086 return false;
3087 TCChain = Copy->getOperand(0);
3088 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
3089 return false;
3090
3091 bool HasRet = false;
3092 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
3093 UI != UE; ++UI) {
3094 if (UI->getOpcode() != X86ISD::RET_FLAG)
3095 return false;
3096 // If we are returning more than one value, we can definitely
3097 // not make a tail call see PR19530
3098 if (UI->getNumOperands() > 4)
3099 return false;
3100 if (UI->getNumOperands() == 4 &&
3101 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
3102 return false;
3103 HasRet = true;
3104 }
3105
3106 if (!HasRet)
3107 return false;
3108
3109 Chain = TCChain;
3110 return true;
3111}
3112
3113EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
3114 ISD::NodeType ExtendKind) const {
3115 MVT ReturnMVT = MVT::i32;
3116
3117 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
3118 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
3119 // The ABI does not require i1, i8 or i16 to be extended.
3120 //
3121 // On Darwin, there is code in the wild relying on Clang's old behaviour of
3122 // always extending i8/i16 return values, so keep doing that for now.
3123 // (PR26665).
3124 ReturnMVT = MVT::i8;
3125 }
3126
3127 EVT MinVT = getRegisterType(Context, ReturnMVT);
3128 return VT.bitsLT(MinVT) ? MinVT : VT;
3129}
3130
3131/// Reads two 32 bit registers and creates a 64 bit mask value.
3132/// \param VA The current 32 bit value that need to be assigned.
3133/// \param NextVA The next 32 bit value that need to be assigned.
3134/// \param Root The parent DAG node.
3135/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
3136/// glue purposes. In the case the DAG is already using
3137/// physical register instead of virtual, we should glue
3138/// our new SDValue to InFlag SDvalue.
3139/// \return a new SDvalue of size 64bit.
3140static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
3141 SDValue &Root, SelectionDAG &DAG,
3142 const SDLoc &Dl, const X86Subtarget &Subtarget,
3143 SDValue *InFlag = nullptr) {
3144 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3144, __extension__ __PRETTY_FUNCTION__))
;
3145 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3145, __extension__ __PRETTY_FUNCTION__))
;
3146 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3147, __extension__ __PRETTY_FUNCTION__))
3147 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3147, __extension__ __PRETTY_FUNCTION__))
;
3148 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3149, __extension__ __PRETTY_FUNCTION__))
3149 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3149, __extension__ __PRETTY_FUNCTION__))
;
3150 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3151, __extension__ __PRETTY_FUNCTION__))
3151 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3151, __extension__ __PRETTY_FUNCTION__))
;
3152
3153 SDValue Lo, Hi;
3154 SDValue ArgValueLo, ArgValueHi;
3155
3156 MachineFunction &MF = DAG.getMachineFunction();
3157 const TargetRegisterClass *RC = &X86::GR32RegClass;
3158
3159 // Read a 32 bit value from the registers.
3160 if (nullptr == InFlag) {
3161 // When no physical register is present,
3162 // create an intermediate virtual register.
3163 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3164 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3165 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3166 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
3167 } else {
3168 // When a physical register is available read the value from it and glue
3169 // the reads together.
3170 ArgValueLo =
3171 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
3172 *InFlag = ArgValueLo.getValue(2);
3173 ArgValueHi =
3174 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
3175 *InFlag = ArgValueHi.getValue(2);
3176 }
3177
3178 // Convert the i32 type into v32i1 type.
3179 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
3180
3181 // Convert the i32 type into v32i1 type.
3182 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
3183
3184 // Concatenate the two values together.
3185 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
3186}
3187
3188/// The function will lower a register of various sizes (8/16/32/64)
3189/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
3190/// \returns a DAG node contains the operand after lowering to mask type.
3191static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3192 const EVT &ValLoc, const SDLoc &Dl,
3193 SelectionDAG &DAG) {
3194 SDValue ValReturned = ValArg;
3195
3196 if (ValVT == MVT::v1i1)
3197 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3198
3199 if (ValVT == MVT::v64i1) {
3200 // In 32 bit machine, this case is handled by getv64i1Argument
3201 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3201, __extension__ __PRETTY_FUNCTION__))
;
3202 // In 64 bit machine, There is no need to truncate the value only bitcast
3203 } else {
3204 MVT maskLen;
3205 switch (ValVT.getSimpleVT().SimpleTy) {
3206 case MVT::v8i1:
3207 maskLen = MVT::i8;
3208 break;
3209 case MVT::v16i1:
3210 maskLen = MVT::i16;
3211 break;
3212 case MVT::v32i1:
3213 maskLen = MVT::i32;
3214 break;
3215 default:
3216 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3216)
;
3217 }
3218
3219 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3220 }
3221 return DAG.getBitcast(ValVT, ValReturned);
3222}
3223
3224/// Lower the result values of a call into the
3225/// appropriate copies out of appropriate physical registers.
3226///
3227SDValue X86TargetLowering::LowerCallResult(
3228 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3229 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3230 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3231 uint32_t *RegMask) const {
3232
3233 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3234 // Assign locations to each value returned by this call.
3235 SmallVector<CCValAssign, 16> RVLocs;
3236 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3237 *DAG.getContext());
3238 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3239
3240 // Copy all of the result registers out of their specified physreg.
3241 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3242 ++I, ++InsIndex) {
3243 CCValAssign &VA = RVLocs[I];
3244 EVT CopyVT = VA.getLocVT();
3245
3246 // In some calling conventions we need to remove the used registers
3247 // from the register mask.
3248 if (RegMask) {
3249 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3250 SubRegs.isValid(); ++SubRegs)
3251 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3252 }
3253
3254 // Report an error if there was an attempt to return FP values via XMM
3255 // registers.
3256 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3257 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3258 if (VA.getLocReg() == X86::XMM1)
3259 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3260 else
3261 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3262 } else if (!Subtarget.hasSSE2() &&
3263 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3264 CopyVT == MVT::f64) {
3265 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3266 if (VA.getLocReg() == X86::XMM1)
3267 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3268 else
3269 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3270 }
3271
3272 // If we prefer to use the value in xmm registers, copy it out as f80 and
3273 // use a truncate to move it from fp stack reg to xmm reg.
3274 bool RoundAfterCopy = false;
3275 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3276 isScalarFPTypeInSSEReg(VA.getValVT())) {
3277 if (!Subtarget.hasX87())
3278 report_fatal_error("X87 register return with X87 disabled");
3279 CopyVT = MVT::f80;
3280 RoundAfterCopy = (CopyVT != VA.getLocVT());
3281 }
3282
3283 SDValue Val;
3284 if (VA.needsCustom()) {
3285 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3286, __extension__ __PRETTY_FUNCTION__))
3286 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3286, __extension__ __PRETTY_FUNCTION__))
;
3287 Val =
3288 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3289 } else {
3290 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3291 .getValue(1);
3292 Val = Chain.getValue(0);
3293 InFlag = Chain.getValue(2);
3294 }
3295
3296 if (RoundAfterCopy)
3297 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3298 // This truncation won't change the value.
3299 DAG.getIntPtrConstant(1, dl));
3300
3301 if (VA.isExtInLoc()) {
3302 if (VA.getValVT().isVector() &&
3303 VA.getValVT().getScalarType() == MVT::i1 &&
3304 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3305 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3306 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3307 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3308 } else
3309 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3310 }
3311
3312 if (VA.getLocInfo() == CCValAssign::BCvt)
3313 Val = DAG.getBitcast(VA.getValVT(), Val);
3314
3315 InVals.push_back(Val);
3316 }
3317
3318 return Chain;
3319}
3320
3321//===----------------------------------------------------------------------===//
3322// C & StdCall & Fast Calling Convention implementation
3323//===----------------------------------------------------------------------===//
3324// StdCall calling convention seems to be standard for many Windows' API
3325// routines and around. It differs from C calling convention just a little:
3326// callee should clean up the stack, not caller. Symbols should be also
3327// decorated in some fancy way :) It doesn't support any vector arguments.
3328// For info on fast calling convention see Fast Calling Convention (tail call)
3329// implementation LowerX86_32FastCCCallTo.
3330
3331/// CallIsStructReturn - Determines whether a call uses struct return
3332/// semantics.
3333enum StructReturnType {
3334 NotStructReturn,
3335 RegStructReturn,
3336 StackStructReturn
3337};
3338static StructReturnType
3339callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3340 if (Outs.empty())
3341 return NotStructReturn;
3342
3343 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3344 if (!Flags.isSRet())
3345 return NotStructReturn;
3346 if (Flags.isInReg() || IsMCU)
3347 return RegStructReturn;
3348 return StackStructReturn;
3349}
3350
3351/// Determines whether a function uses struct return semantics.
3352static StructReturnType
3353argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3354 if (Ins.empty())
3355 return NotStructReturn;
3356
3357 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3358 if (!Flags.isSRet())
3359 return NotStructReturn;
3360 if (Flags.isInReg() || IsMCU)
3361 return RegStructReturn;
3362 return StackStructReturn;
3363}
3364
3365/// Make a copy of an aggregate at address specified by "Src" to address
3366/// "Dst" with size and alignment information specified by the specific
3367/// parameter attribute. The copy will be passed as a byval function parameter.
3368static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3369 SDValue Chain, ISD::ArgFlagsTy Flags,
3370 SelectionDAG &DAG, const SDLoc &dl) {
3371 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3372
3373 return DAG.getMemcpy(
3374 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3375 /*isVolatile*/ false, /*AlwaysInline=*/true,
3376 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3377}
3378
3379/// Return true if the calling convention is one that we can guarantee TCO for.
3380static bool canGuaranteeTCO(CallingConv::ID CC) {
3381 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3382 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3383 CC == CallingConv::HHVM || CC == CallingConv::Tail ||
3384 CC == CallingConv::SwiftTail);
3385}
3386
3387/// Return true if we might ever do TCO for calls with this calling convention.
3388static bool mayTailCallThisCC(CallingConv::ID CC) {
3389 switch (CC) {
3390 // C calling conventions:
3391 case CallingConv::C:
3392 case CallingConv::Win64:
3393 case CallingConv::X86_64_SysV:
3394 // Callee pop conventions:
3395 case CallingConv::X86_ThisCall:
3396 case CallingConv::X86_StdCall:
3397 case CallingConv::X86_VectorCall:
3398 case CallingConv::X86_FastCall:
3399 // Swift:
3400 case CallingConv::Swift:
3401 return true;
3402 default:
3403 return canGuaranteeTCO(CC);
3404 }
3405}
3406
3407/// Return true if the function is being made into a tailcall target by
3408/// changing its ABI.
3409static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3410 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3411 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3412}
3413
3414bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3415 if (!CI->isTailCall())
3416 return false;
3417
3418 CallingConv::ID CalleeCC = CI->getCallingConv();
3419 if (!mayTailCallThisCC(CalleeCC))
3420 return false;
3421
3422 return true;
3423}
3424
3425SDValue
3426X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3427 const SmallVectorImpl<ISD::InputArg> &Ins,
3428 const SDLoc &dl, SelectionDAG &DAG,
3429 const CCValAssign &VA,
3430 MachineFrameInfo &MFI, unsigned i) const {
3431 // Create the nodes corresponding to a load from this parameter slot.
3432 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3433 bool AlwaysUseMutable = shouldGuaranteeTCO(
3434 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3435 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3436 EVT ValVT;
3437 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3438
3439 // If value is passed by pointer we have address passed instead of the value
3440 // itself. No need to extend if the mask value and location share the same
3441 // absolute size.
3442 bool ExtendedInMem =
3443 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3444 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3445
3446 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3447 ValVT = VA.getLocVT();
3448 else
3449 ValVT = VA.getValVT();
3450
3451 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3452 // changed with more analysis.
3453 // In case of tail call optimization mark all arguments mutable. Since they
3454 // could be overwritten by lowering of arguments in case of a tail call.
3455 if (Flags.isByVal()) {
3456 unsigned Bytes = Flags.getByValSize();
3457 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3458
3459 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3460 // can be improved with deeper analysis.
3461 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3462 /*isAliased=*/true);
3463 return DAG.getFrameIndex(FI, PtrVT);
3464 }
3465
3466 EVT ArgVT = Ins[i].ArgVT;
3467
3468 // If this is a vector that has been split into multiple parts, and the
3469 // scalar size of the parts don't match the vector element size, then we can't
3470 // elide the copy. The parts will have padding between them instead of being
3471 // packed like a vector.
3472 bool ScalarizedAndExtendedVector =
3473 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3474 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3475
3476 // This is an argument in memory. We might be able to perform copy elision.
3477 // If the argument is passed directly in memory without any extension, then we
3478 // can perform copy elision. Large vector types, for example, may be passed
3479 // indirectly by pointer.
3480 if (Flags.isCopyElisionCandidate() &&
3481 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3482 !ScalarizedAndExtendedVector) {
3483 SDValue PartAddr;
3484 if (Ins[i].PartOffset == 0) {
3485 // If this is a one-part value or the first part of a multi-part value,
3486 // create a stack object for the entire argument value type and return a
3487 // load from our portion of it. This assumes that if the first part of an
3488 // argument is in memory, the rest will also be in memory.
3489 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3490 /*IsImmutable=*/false);
3491 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3492 return DAG.getLoad(
3493 ValVT, dl, Chain, PartAddr,
3494 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3495 } else {
3496 // This is not the first piece of an argument in memory. See if there is
3497 // already a fixed stack object including this offset. If so, assume it
3498 // was created by the PartOffset == 0 branch above and create a load from
3499 // the appropriate offset into it.
3500 int64_t PartBegin = VA.getLocMemOffset();
3501 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3502 int FI = MFI.getObjectIndexBegin();
3503 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3504 int64_t ObjBegin = MFI.getObjectOffset(FI);
3505 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3506 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3507 break;
3508 }
3509 if (MFI.isFixedObjectIndex(FI)) {
3510 SDValue Addr =
3511 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3512 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3513 return DAG.getLoad(
3514 ValVT, dl, Chain, Addr,
3515 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3516 Ins[i].PartOffset));
3517 }
3518 }
3519 }
3520
3521 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3522 VA.getLocMemOffset(), isImmutable);
3523
3524 // Set SExt or ZExt flag.
3525 if (VA.getLocInfo() == CCValAssign::ZExt) {
3526 MFI.setObjectZExt(FI, true);
3527 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3528 MFI.setObjectSExt(FI, true);
3529 }
3530
3531 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3532 SDValue Val = DAG.getLoad(
3533 ValVT, dl, Chain, FIN,
3534 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3535 return ExtendedInMem
3536 ? (VA.getValVT().isVector()
3537 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3538 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3539 : Val;
3540}
3541
3542// FIXME: Get this from tablegen.
3543static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3544 const X86Subtarget &Subtarget) {
3545 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3545, __extension__ __PRETTY_FUNCTION__))
;
3546
3547 if (Subtarget.isCallingConvWin64(CallConv)) {
3548 static const MCPhysReg GPR64ArgRegsWin64[] = {
3549 X86::RCX, X86::RDX, X86::R8, X86::R9
3550 };
3551 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3552 }
3553
3554 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3555 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3556 };
3557 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3558}
3559
3560// FIXME: Get this from tablegen.
3561static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3562 CallingConv::ID CallConv,
3563 const X86Subtarget &Subtarget) {
3564 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3564, __extension__ __PRETTY_FUNCTION__))
;
3565 if (Subtarget.isCallingConvWin64(CallConv)) {
3566 // The XMM registers which might contain var arg parameters are shadowed
3567 // in their paired GPR. So we only need to save the GPR to their home
3568 // slots.
3569 // TODO: __vectorcall will change this.
3570 return None;
3571 }
3572
3573 bool isSoftFloat = Subtarget.useSoftFloat();
3574 if (isSoftFloat || !Subtarget.hasSSE1())
3575 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3576 // registers.
3577 return None;
3578
3579 static const MCPhysReg XMMArgRegs64Bit[] = {
3580 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3581 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3582 };
3583 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3584}
3585
3586#ifndef NDEBUG
3587static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3588 return llvm::is_sorted(
3589 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3590 return A.getValNo() < B.getValNo();
3591 });
3592}
3593#endif
3594
3595namespace {
3596/// This is a helper class for lowering variable arguments parameters.
3597class VarArgsLoweringHelper {
3598public:
3599 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3600 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3601 CallingConv::ID CallConv, CCState &CCInfo)
3602 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3603 TheMachineFunction(DAG.getMachineFunction()),
3604 TheFunction(TheMachineFunction.getFunction()),
3605 FrameInfo(TheMachineFunction.getFrameInfo()),
3606 FrameLowering(*Subtarget.getFrameLowering()),
3607 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3608 CCInfo(CCInfo) {}
3609
3610 // Lower variable arguments parameters.
3611 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3612
3613private:
3614 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3615
3616 void forwardMustTailParameters(SDValue &Chain);
3617
3618 bool is64Bit() const { return Subtarget.is64Bit(); }
3619 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3620
3621 X86MachineFunctionInfo *FuncInfo;
3622 const SDLoc &DL;
3623 SelectionDAG &DAG;
3624 const X86Subtarget &Subtarget;
3625 MachineFunction &TheMachineFunction;
3626 const Function &TheFunction;
3627 MachineFrameInfo &FrameInfo;
3628 const TargetFrameLowering &FrameLowering;
3629 const TargetLowering &TargLowering;
3630 CallingConv::ID CallConv;
3631 CCState &CCInfo;
3632};
3633} // namespace
3634
3635void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3636 SDValue &Chain, unsigned StackSize) {
3637 // If the function takes variable number of arguments, make a frame index for
3638 // the start of the first vararg value... for expansion of llvm.va_start. We
3639 // can skip this if there are no va_start calls.
3640 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3641 CallConv != CallingConv::X86_ThisCall)) {
3642 FuncInfo->setVarArgsFrameIndex(
3643 FrameInfo.CreateFixedObject(1, StackSize, true));
3644 }
3645
3646 // 64-bit calling conventions support varargs and register parameters, so we
3647 // have to do extra work to spill them in the prologue.
3648 if (is64Bit()) {
3649 // Find the first unallocated argument registers.
3650 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3651 ArrayRef<MCPhysReg> ArgXMMs =
3652 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3653 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3654 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3655
3656 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3657, __extension__ __PRETTY_FUNCTION__))
3657 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3657, __extension__ __PRETTY_FUNCTION__))
;
3658
3659 if (isWin64()) {
3660 // Get to the caller-allocated home save location. Add 8 to account
3661 // for the return address.
3662 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3663 FuncInfo->setRegSaveFrameIndex(
3664 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3665 // Fixup to set vararg frame on shadow area (4 x i64).
3666 if (NumIntRegs < 4)
3667 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3668 } else {
3669 // For X86-64, if there are vararg parameters that are passed via
3670 // registers, then we must store them to their spots on the stack so
3671 // they may be loaded by dereferencing the result of va_next.
3672 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3673 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3674 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3675 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3676 }
3677
3678 SmallVector<SDValue, 6>
3679 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3680 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3681 // keeping live input value
3682 SDValue ALVal; // if applicable keeps SDValue for %al register
3683
3684 // Gather all the live in physical registers.
3685 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3686 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3687 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3688 }
3689 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3690 if (!AvailableXmms.empty()) {
3691 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3692 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3693 for (MCPhysReg Reg : AvailableXmms) {
3694 // FastRegisterAllocator spills virtual registers at basic
3695 // block boundary. That leads to usages of xmm registers
3696 // outside of check for %al. Pass physical registers to
3697 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
3698 TheMachineFunction.getRegInfo().addLiveIn(Reg);
3699 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
3700 }
3701 }
3702
3703 // Store the integer parameter registers.
3704 SmallVector<SDValue, 8> MemOps;
3705 SDValue RSFIN =
3706 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3707 TargLowering.getPointerTy(DAG.getDataLayout()));
3708 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3709 for (SDValue Val : LiveGPRs) {
3710 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3711 TargLowering.getPointerTy(DAG.getDataLayout()),
3712 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3713 SDValue Store =
3714 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3715 MachinePointerInfo::getFixedStack(
3716 DAG.getMachineFunction(),
3717 FuncInfo->getRegSaveFrameIndex(), Offset));
3718 MemOps.push_back(Store);
3719 Offset += 8;
3720 }
3721
3722 // Now store the XMM (fp + vector) parameter registers.
3723 if (!LiveXMMRegs.empty()) {
3724 SmallVector<SDValue, 12> SaveXMMOps;
3725 SaveXMMOps.push_back(Chain);
3726 SaveXMMOps.push_back(ALVal);
3727 SaveXMMOps.push_back(
3728 DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
3729 SaveXMMOps.push_back(
3730 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3731 llvm::append_range(SaveXMMOps, LiveXMMRegs);
3732 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
3733 MVT::Other, SaveXMMOps));
3734 }
3735
3736 if (!MemOps.empty())
3737 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3738 }
3739}
3740
3741void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3742 // Find the largest legal vector type.
3743 MVT VecVT = MVT::Other;
3744 // FIXME: Only some x86_32 calling conventions support AVX512.
3745 if (Subtarget.useAVX512Regs() &&
3746 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3747 CallConv == CallingConv::Intel_OCL_BI)))
3748 VecVT = MVT::v16f32;
3749 else if (Subtarget.hasAVX())
3750 VecVT = MVT::v8f32;
3751 else if (Subtarget.hasSSE2())
3752 VecVT = MVT::v4f32;
3753
3754 // We forward some GPRs and some vector types.
3755 SmallVector<MVT, 2> RegParmTypes;
3756 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3757 RegParmTypes.push_back(IntVT);
3758 if (VecVT != MVT::Other)
3759 RegParmTypes.push_back(VecVT);
3760
3761 // Compute the set of forwarded registers. The rest are scratch.
3762 SmallVectorImpl<ForwardedRegister> &Forwards =
3763 FuncInfo->getForwardedMustTailRegParms();
3764 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3765
3766 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3767 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3768 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3769 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3770 }
3771
3772 // Copy all forwards from physical to virtual registers.
3773 for (ForwardedRegister &FR : Forwards) {
3774 // FIXME: Can we use a less constrained schedule?
3775 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3776 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3777 TargLowering.getRegClassFor(FR.VT));
3778 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3779 }
3780}
3781
3782void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3783 unsigned StackSize) {
3784 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3785 // If necessary, it would be set into the correct value later.
3786 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3787 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3788
3789 if (FrameInfo.hasVAStart())
3790 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3791
3792 if (FrameInfo.hasMustTailInVarArgFunc())
3793 forwardMustTailParameters(Chain);
3794}
3795
3796SDValue X86TargetLowering::LowerFormalArguments(
3797 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3798 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3799 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3800 MachineFunction &MF = DAG.getMachineFunction();
3801 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3802
3803 const Function &F = MF.getFunction();
3804 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3805 F.getName() == "main")
3806 FuncInfo->setForceFramePointer(true);
3807
3808 MachineFrameInfo &MFI = MF.getFrameInfo();
3809 bool Is64Bit = Subtarget.is64Bit();
3810 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3811
3812 assert((static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3814, __extension__ __PRETTY_FUNCTION__))
3813 !(IsVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3814, __extension__ __PRETTY_FUNCTION__))
3814 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(IsVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3814, __extension__ __PRETTY_FUNCTION__))
;
3815
3816 // Assign locations to all of the incoming arguments.
3817 SmallVector<CCValAssign, 16> ArgLocs;
3818 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3819
3820 // Allocate shadow area for Win64.
3821 if (IsWin64)
3822 CCInfo.AllocateStack(32, Align(8));
3823
3824 CCInfo.AnalyzeArguments(Ins, CC_X86);
3825
3826 // In vectorcall calling convention a second pass is required for the HVA
3827 // types.
3828 if (CallingConv::X86_VectorCall == CallConv) {
3829 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3830 }
3831
3832 // The next loop assumes that the locations are in the same order of the
3833 // input arguments.
3834 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3835, __extension__ __PRETTY_FUNCTION__))
3835 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3835, __extension__ __PRETTY_FUNCTION__))
;
3836
3837 SDValue ArgValue;
3838 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3839 ++I, ++InsIndex) {
3840 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3840, __extension__ __PRETTY_FUNCTION__))
;
3841 CCValAssign &VA = ArgLocs[I];
3842
3843 if (VA.isRegLoc()) {
3844 EVT RegVT = VA.getLocVT();
3845 if (VA.needsCustom()) {
3846 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3848, __extension__ __PRETTY_FUNCTION__))
3847 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3848, __extension__ __PRETTY_FUNCTION__))
3848 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3848, __extension__ __PRETTY_FUNCTION__))
;
3849
3850 // v64i1 values, in regcall calling convention, that are
3851 // compiled to 32 bit arch, are split up into two registers.
3852 ArgValue =
3853 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3854 } else {
3855 const TargetRegisterClass *RC;
3856 if (RegVT == MVT::i8)
3857 RC = &X86::GR8RegClass;
3858 else if (RegVT == MVT::i16)
3859 RC = &X86::GR16RegClass;
3860 else if (RegVT == MVT::i32)
3861 RC = &X86::GR32RegClass;
3862 else if (Is64Bit && RegVT == MVT::i64)
3863 RC = &X86::GR64RegClass;
3864 else if (RegVT == MVT::f16)
3865 RC = &X86::FR16XRegClass;
3866 else if (RegVT == MVT::f32)
3867 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3868 else if (RegVT == MVT::f64)
3869 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3870 else if (RegVT == MVT::f80)
3871 RC = &X86::RFP80RegClass;
3872 else if (RegVT == MVT::f128)
3873 RC = &X86::VR128RegClass;
3874 else if (RegVT.is512BitVector())
3875 RC = &X86::VR512RegClass;
3876 else if (RegVT.is256BitVector())
3877 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3878 else if (RegVT.is128BitVector())
3879 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3880 else if (RegVT == MVT::x86mmx)
3881 RC = &X86::VR64RegClass;
3882 else if (RegVT == MVT::v1i1)
3883 RC = &X86::VK1RegClass;
3884 else if (RegVT == MVT::v8i1)
3885 RC = &X86::VK8RegClass;
3886 else if (RegVT == MVT::v16i1)
3887 RC = &X86::VK16RegClass;
3888 else if (RegVT == MVT::v32i1)
3889 RC = &X86::VK32RegClass;
3890 else if (RegVT == MVT::v64i1)
3891 RC = &X86::VK64RegClass;
3892 else
3893 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3893)
;
3894
3895 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3896 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3897 }
3898
3899 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3900 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3901 // right size.
3902 if (VA.getLocInfo() == CCValAssign::SExt)
3903 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3904 DAG.getValueType(VA.getValVT()));
3905 else if (VA.getLocInfo() == CCValAssign::ZExt)
3906 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3907 DAG.getValueType(VA.getValVT()));
3908 else if (VA.getLocInfo() == CCValAssign::BCvt)
3909 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3910
3911 if (VA.isExtInLoc()) {
3912 // Handle MMX values passed in XMM regs.
3913 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3914 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3915 else if (VA.getValVT().isVector() &&
3916 VA.getValVT().getScalarType() == MVT::i1 &&
3917 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3918 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3919 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3920 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3921 } else
3922 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3923 }
3924 } else {
3925 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3925, __extension__ __PRETTY_FUNCTION__))
;
3926 ArgValue =
3927 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3928 }
3929
3930 // If value is passed via pointer - do a load.
3931 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3932 ArgValue =
3933 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3934
3935 InVals.push_back(ArgValue);
3936 }
3937
3938 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3939 if (Ins[I].Flags.isSwiftAsync()) {
3940 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
3941 if (Subtarget.is64Bit())
3942 X86FI->setHasSwiftAsyncContext(true);
3943 else {
3944 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
3945 X86FI->setSwiftAsyncContextFrameIdx(FI);
3946 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
3947 DAG.getFrameIndex(FI, MVT::i32),
3948 MachinePointerInfo::getFixedStack(MF, FI));
3949 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
3950 }
3951 }
3952
3953 // Swift calling convention does not require we copy the sret argument
3954 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3955 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
3956 continue;
3957
3958 // All x86 ABIs require that for returning structs by value we copy the
3959 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3960 // the argument into a virtual register so that we can access it from the
3961 // return points.
3962 if (Ins[I].Flags.isSRet()) {
3963 assert(!FuncInfo->getSRetReturnReg() &&(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3964, __extension__ __PRETTY_FUNCTION__))
3964 "SRet return has already been set")(static_cast <bool> (!FuncInfo->getSRetReturnReg() &&
"SRet return has already been set") ? void (0) : __assert_fail
("!FuncInfo->getSRetReturnReg() && \"SRet return has already been set\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3964, __extension__ __PRETTY_FUNCTION__))
;
3965 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3966 Register Reg =
3967 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3968 FuncInfo->setSRetReturnReg(Reg);
3969 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3970 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3971 break;
3972 }
3973 }
3974
3975 unsigned StackSize = CCInfo.getNextStackOffset();
3976 // Align stack specially for tail calls.
3977 if (shouldGuaranteeTCO(CallConv,
3978 MF.getTarget().Options.GuaranteedTailCallOpt))
3979 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3980
3981 if (IsVarArg)
3982 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
3983 .lowerVarArgsParameters(Chain, StackSize);
3984
3985 // Some CCs need callee pop.
3986 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
3987 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3988 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3989 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3990 // X86 interrupts must pop the error code (and the alignment padding) if
3991 // present.
3992 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3993 } else {
3994 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3995 // If this is an sret function, the return should pop the hidden pointer.
3996 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3997 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3998 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3999 FuncInfo->setBytesToPopOnReturn(4);
4000 }
4001
4002 if (!Is64Bit) {
4003 // RegSaveFrameIndex is X86-64 only.
4004 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
4005 }
4006
4007 FuncInfo->setArgumentStackSize(StackSize);
4008
4009 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
4010 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
4011 if (Personality == EHPersonality::CoreCLR) {
4012 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4012, __extension__ __PRETTY_FUNCTION__))
;
4013 // TODO: Add a mechanism to frame lowering that will allow us to indicate
4014 // that we'd prefer this slot be allocated towards the bottom of the frame
4015 // (i.e. near the stack pointer after allocating the frame). Every
4016 // funclet needs a copy of this slot in its (mostly empty) frame, and the
4017 // offset from the bottom of this and each funclet's frame must be the
4018 // same, so the size of funclets' (mostly empty) frames is dictated by
4019 // how far this slot is from the bottom (since they allocate just enough
4020 // space to accommodate holding this slot at the correct offset).
4021 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
4022 EHInfo->PSPSymFrameIdx = PSPSymFI;
4023 }
4024 }
4025
4026 if (CallConv == CallingConv::X86_RegCall ||
4027 F.hasFnAttribute("no_caller_saved_registers")) {
4028 MachineRegisterInfo &MRI = MF.getRegInfo();
4029 for (std::pair<Register, Register> Pair : MRI.liveins())
4030 MRI.disableCalleeSavedRegister(Pair.first);
4031 }
4032
4033 return Chain;
4034}
4035
4036SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
4037 SDValue Arg, const SDLoc &dl,
4038 SelectionDAG &DAG,
4039 const CCValAssign &VA,
4040 ISD::ArgFlagsTy Flags,
4041 bool isByVal) const {
4042 unsigned LocMemOffset = VA.getLocMemOffset();
4043 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4044 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4045 StackPtr, PtrOff);
4046 if (isByVal)
4047 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
4048
4049 return DAG.getStore(
4050 Chain, dl, Arg, PtrOff,
4051 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
4052}
4053
4054/// Emit a load of return address if tail call
4055/// optimization is performed and it is required.
4056SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
4057 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
4058 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
4059 // Adjust the Return address stack slot.
4060 EVT VT = getPointerTy(DAG.getDataLayout());
4061 OutRetAddr = getReturnAddressFrameIndex(DAG);
4062
4063 // Load the "old" Return address.
4064 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
4065 return SDValue(OutRetAddr.getNode(), 1);
4066}
4067
4068/// Emit a store of the return address if tail call
4069/// optimization is performed and it is required (FPDiff!=0).
4070static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
4071 SDValue Chain, SDValue RetAddrFrIdx,
4072 EVT PtrVT, unsigned SlotSize,
4073 int FPDiff, const SDLoc &dl) {
4074 // Store the return address to the appropriate stack slot.
4075 if (!FPDiff) return Chain;
4076 // Calculate the new stack slot for the return address.
4077 int NewReturnAddrFI =
4078 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
4079 false);
4080 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
4081 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
4082 MachinePointerInfo::getFixedStack(
4083 DAG.getMachineFunction(), NewReturnAddrFI));
4084 return Chain;
4085}
4086
4087/// Returns a vector_shuffle mask for an movs{s|d}, movd
4088/// operation of specified width.
4089static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
4090 SDValue V2) {
4091 unsigned NumElems = VT.getVectorNumElements();
4092 SmallVector<int, 8> Mask;
4093 Mask.push_back(NumElems);
4094 for (unsigned i = 1; i != NumElems; ++i)
4095 Mask.push_back(i);
4096 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
4097}
4098
4099SDValue
4100X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4101 SmallVectorImpl<SDValue> &InVals) const {
4102 SelectionDAG &DAG = CLI.DAG;
4103 SDLoc &dl = CLI.DL;
4104 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4105 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4106 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4107 SDValue Chain = CLI.Chain;
4108 SDValue Callee = CLI.Callee;
4109 CallingConv::ID CallConv = CLI.CallConv;
4110 bool &isTailCall = CLI.IsTailCall;
4111 bool isVarArg = CLI.IsVarArg;
4112 const auto *CB = CLI.CB;
4113
4114 MachineFunction &MF = DAG.getMachineFunction();
4115 bool Is64Bit = Subtarget.is64Bit();
4116 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
4117 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
4118 bool IsSibcall = false;
4119 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
4120 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
4121 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
4122 bool HasNCSR = (CB && isa<CallInst>(CB) &&
4123 CB->hasFnAttr("no_caller_saved_registers"));
4124 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
4125 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
4126 const Module *M = MF.getMMI().getModule();
4127 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
4128
4129 MachineFunction::CallSiteInfo CSInfo;
4130 if (CallConv == CallingConv::X86_INTR)
4131 report_fatal_error("X86 interrupts may not be called directly");
4132
4133 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
4134 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
4135 // If we are using a GOT, disable tail calls to external symbols with
4136 // default visibility. Tail calling such a symbol requires using a GOT
4137 // relocation, which forces early binding of the symbol. This breaks code
4138 // that require lazy function symbol resolution. Using musttail or
4139 // GuaranteedTailCallOpt will override this.
4140 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4141 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
4142 G->getGlobal()->hasDefaultVisibility()))
4143 isTailCall = false;
4144 }
4145
4146
4147 if (isTailCall && !IsMustTail) {
4148 // Check if it's really possible to do a tail call.
4149 isTailCall = IsEligibleForTailCallOptimization(
4150 Callee, CallConv, SR == StackStructReturn, isVarArg, CLI.RetTy, Outs,
4151 OutVals, Ins, DAG);
4152
4153 // Sibcalls are automatically detected tailcalls which do not require
4154 // ABI changes.
4155 if (!IsGuaranteeTCO && isTailCall)
4156 IsSibcall = true;
4157
4158 if (isTailCall)
4159 ++NumTailCalls;
4160 }
4161
4162 if (IsMustTail && !isTailCall)
4163 report_fatal_error("failed to perform tail call elimination on a call "
4164 "site marked musttail");
4165
4166 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4167, __extension__ __PRETTY_FUNCTION__))
4167 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4167, __extension__ __PRETTY_FUNCTION__))
;
4168
4169 // Analyze operands of the call, assigning locations to each operand.
4170 SmallVector<CCValAssign, 16> ArgLocs;
4171 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4172
4173 // Allocate shadow area for Win64.
4174 if (IsWin64)
4175 CCInfo.AllocateStack(32, Align(8));
4176
4177 CCInfo.AnalyzeArguments(Outs, CC_X86);
4178
4179 // In vectorcall calling convention a second pass is required for the HVA
4180 // types.
4181 if (CallingConv::X86_VectorCall == CallConv) {
4182 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
4183 }
4184
4185 // Get a count of how many bytes are to be pushed on the stack.
4186 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
4187 if (IsSibcall)
4188 // This is a sibcall. The memory operands are available in caller's
4189 // own caller's stack.
4190 NumBytes = 0;
4191 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4192 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4193
4194 int FPDiff = 0;
4195 if (isTailCall &&
4196 shouldGuaranteeTCO(CallConv,
4197 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4198 // Lower arguments at fp - stackoffset + fpdiff.
4199 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4200
4201 FPDiff = NumBytesCallerPushed - NumBytes;
4202
4203 // Set the delta of movement of the returnaddr stackslot.
4204 // But only set if delta is greater than previous delta.
4205 if (FPDiff < X86Info->getTCReturnAddrDelta())
4206 X86Info->setTCReturnAddrDelta(FPDiff);
4207 }
4208
4209 unsigned NumBytesToPush = NumBytes;
4210 unsigned NumBytesToPop = NumBytes;
4211
4212 // If we have an inalloca argument, all stack space has already been allocated
4213 // for us and be right at the top of the stack. We don't support multiple
4214 // arguments passed in memory when using inalloca.
4215 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4216 NumBytesToPush = 0;
4217 if (!ArgLocs.back().isMemLoc())
4218 report_fatal_error("cannot use inalloca attribute on a register "
4219 "parameter");
4220 if (ArgLocs.back().getLocMemOffset() != 0)
4221 report_fatal_error("any parameter with the inalloca attribute must be "
4222 "the only memory argument");
4223 } else if (CLI.IsPreallocated) {
4224 assert(ArgLocs.back().isMemLoc() &&(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4226, __extension__ __PRETTY_FUNCTION__))
4225 "cannot use preallocated attribute on a register "(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4226, __extension__ __PRETTY_FUNCTION__))
4226 "parameter")(static_cast <bool> (ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register " "parameter"
) ? void (0) : __assert_fail ("ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4226, __extension__ __PRETTY_FUNCTION__))
;
4227 SmallVector<size_t, 4> PreallocatedOffsets;
4228 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4229 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4230 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4231 }
4232 }
4233 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4234 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4235 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4236 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4237 NumBytesToPush = 0;
4238 }
4239
4240 if (!IsSibcall && !IsMustTail)
4241 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4242 NumBytes - NumBytesToPush, dl);
4243
4244 SDValue RetAddrFrIdx;
4245 // Load return address for tail calls.
4246 if (isTailCall && FPDiff)
4247 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4248 Is64Bit, FPDiff, dl);
4249
4250 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4251 SmallVector<SDValue, 8> MemOpChains;
4252 SDValue StackPtr;
4253
4254 // The next loop assumes that the locations are in the same order of the
4255 // input arguments.
4256 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4257, __extension__ __PRETTY_FUNCTION__))
4257 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4257, __extension__ __PRETTY_FUNCTION__))
;
4258
4259 // Walk the register/memloc assignments, inserting copies/loads. In the case
4260 // of tail call optimization arguments are handle later.
4261 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4262 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4263 ++I, ++OutIndex) {
4264 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4264, __extension__ __PRETTY_FUNCTION__))
;
4265 // Skip inalloca/preallocated arguments, they have already been written.
4266 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4267 if (Flags.isInAlloca() || Flags.isPreallocated())
4268 continue;
4269
4270 CCValAssign &VA = ArgLocs[I];
4271 EVT RegVT = VA.getLocVT();
4272 SDValue Arg = OutVals[OutIndex];
4273 bool isByVal = Flags.isByVal();
4274
4275 // Promote the value if needed.
4276 switch (VA.getLocInfo()) {
4277 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4277)
;
4278 case CCValAssign::Full: break;
4279 case CCValAssign::SExt:
4280 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4281 break;
4282 case CCValAssign::ZExt:
4283 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4284 break;
4285 case CCValAssign::AExt:
4286 if (Arg.getValueType().isVector() &&
4287 Arg.getValueType().getVectorElementType() == MVT::i1)
4288 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4289 else if (RegVT.is128BitVector()) {
4290 // Special case: passing MMX values in XMM registers.
4291 Arg = DAG.getBitcast(MVT::i64, Arg);
4292 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4293 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4294 } else
4295 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4296 break;
4297 case CCValAssign::BCvt:
4298 Arg = DAG.getBitcast(RegVT, Arg);
4299 break;
4300 case CCValAssign::Indirect: {
4301 if (isByVal) {
4302 // Memcpy the argument to a temporary stack slot to prevent
4303 // the caller from seeing any modifications the callee may make
4304 // as guaranteed by the `byval` attribute.
4305 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4306 Flags.getByValSize(),
4307 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4308 SDValue StackSlot =
4309 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4310 Chain =
4311 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4312 // From now on treat this as a regular pointer
4313 Arg = StackSlot;
4314 isByVal = false;
4315 } else {
4316 // Store the argument.
4317 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4318 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4319 Chain = DAG.getStore(
4320 Chain, dl, Arg, SpillSlot,
4321 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4322 Arg = SpillSlot;
4323 }
4324 break;
4325 }
4326 }
4327
4328 if (VA.needsCustom()) {
4329 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4330, __extension__ __PRETTY_FUNCTION__))
4330 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4330, __extension__ __PRETTY_FUNCTION__))
;
4331 // Split v64i1 value into two registers
4332 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4333 } else if (VA.isRegLoc()) {
4334 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4335 const TargetOptions &Options = DAG.getTarget().Options;
4336 if (Options.EmitCallSiteInfo)
4337 CSInfo.emplace_back(VA.getLocReg(), I);
4338 if (isVarArg && IsWin64) {
4339 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4340 // shadow reg if callee is a varargs function.
4341 Register ShadowReg;
4342 switch (VA.getLocReg()) {
4343 case X86::XMM0: ShadowReg = X86::RCX; break;
4344 case X86::XMM1: ShadowReg = X86::RDX; break;
4345 case X86::XMM2: ShadowReg = X86::R8; break;
4346 case X86::XMM3: ShadowReg = X86::R9; break;
4347 }
4348 if (ShadowReg)
4349 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4350 }
4351 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4352 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4352, __extension__ __PRETTY_FUNCTION__))
;
4353 if (!StackPtr.getNode())
4354 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4355 getPointerTy(DAG.getDataLayout()));
4356 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4357 dl, DAG, VA, Flags, isByVal));
4358 }
4359 }
4360
4361 if (!MemOpChains.empty())
4362 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4363
4364 if (Subtarget.isPICStyleGOT()) {
4365 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4366 // GOT pointer (except regcall).
4367 if (!isTailCall) {
4368 // Indirect call with RegCall calling convertion may use up all the
4369 // general registers, so it is not suitable to bind EBX reister for
4370 // GOT address, just let register allocator handle it.
4371 if (CallConv != CallingConv::X86_RegCall)
4372 RegsToPass.push_back(std::make_pair(
4373 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4374 getPointerTy(DAG.getDataLayout()))));
4375 } else {
4376 // If we are tail calling and generating PIC/GOT style code load the
4377 // address of the callee into ECX. The value in ecx is used as target of
4378 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4379 // for tail calls on PIC/GOT architectures. Normally we would just put the
4380 // address of GOT into ebx and then call target@PLT. But for tail calls
4381 // ebx would be restored (since ebx is callee saved) before jumping to the
4382 // target@PLT.
4383
4384 // Note: The actual moving to ECX is done further down.
4385 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4386 if (G && !G->getGlobal()->hasLocalLinkage() &&
4387 G->getGlobal()->hasDefaultVisibility())
4388 Callee = LowerGlobalAddress(Callee, DAG);
4389 else if (isa<ExternalSymbolSDNode>(Callee))
4390 Callee = LowerExternalSymbol(Callee, DAG);
4391 }
4392 }
4393
4394 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4395 // From AMD64 ABI document:
4396 // For calls that may call functions that use varargs or stdargs
4397 // (prototype-less calls or calls to functions containing ellipsis (...) in
4398 // the declaration) %al is used as hidden argument to specify the number
4399 // of SSE registers used. The contents of %al do not need to match exactly
4400 // the number of registers, but must be an ubound on the number of SSE
4401 // registers used and is in the range 0 - 8 inclusive.
4402
4403 // Count the number of XMM registers allocated.
4404 static const MCPhysReg XMMArgRegs[] = {
4405 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4406 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4407 };
4408 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4409 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4410, __extension__ __PRETTY_FUNCTION__))
4410 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4410, __extension__ __PRETTY_FUNCTION__))
;
4411 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4412 DAG.getConstant(NumXMMRegs, dl,
4413 MVT::i8)));
4414 }
4415
4416 if (isVarArg && IsMustTail) {
4417 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4418 for (const auto &F : Forwards) {
4419 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4420 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4421 }
4422 }
4423
4424 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4425 // don't need this because the eligibility check rejects calls that require
4426 // shuffling arguments passed in memory.
4427 if (!IsSibcall && isTailCall) {
4428 // Force all the incoming stack arguments to be loaded from the stack
4429 // before any new outgoing arguments are stored to the stack, because the
4430 // outgoing stack slots may alias the incoming argument stack slots, and
4431 // the alias isn't otherwise explicit. This is slightly more conservative
4432 // than necessary, because it means that each store effectively depends
4433 // on every argument instead of just those arguments it would clobber.
4434 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4435
4436 SmallVector<SDValue, 8> MemOpChains2;
4437 SDValue FIN;
4438 int FI = 0;
4439 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4440 ++I, ++OutsIndex) {
4441 CCValAssign &VA = ArgLocs[I];
4442
4443 if (VA.isRegLoc()) {
4444 if (VA.needsCustom()) {
4445 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4446, __extension__ __PRETTY_FUNCTION__))
4446 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4446, __extension__ __PRETTY_FUNCTION__))
;
4447 // This means that we are in special case where one argument was
4448 // passed through two register locations - Skip the next location
4449 ++I;
4450 }
4451
4452 continue;
4453 }
4454
4455 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4455, __extension__ __PRETTY_FUNCTION__))
;
4456 SDValue Arg = OutVals[OutsIndex];
4457 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4458 // Skip inalloca/preallocated arguments. They don't require any work.
4459 if (Flags.isInAlloca() || Flags.isPreallocated())
4460 continue;
4461 // Create frame index.
4462 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4463 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4464 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4465 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4466
4467 if (Flags.isByVal()) {
4468 // Copy relative to framepointer.
4469 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4470 if (!StackPtr.getNode())
4471 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4472 getPointerTy(DAG.getDataLayout()));
4473 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4474 StackPtr, Source);
4475
4476 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4477 ArgChain,
4478 Flags, DAG, dl));
4479 } else {
4480 // Store relative to framepointer.
4481 MemOpChains2.push_back(DAG.getStore(
4482 ArgChain, dl, Arg, FIN,
4483 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4484 }
4485 }
4486
4487 if (!MemOpChains2.empty())
4488 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4489
4490 // Store the return address to the appropriate stack slot.
4491 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4492 getPointerTy(DAG.getDataLayout()),
4493 RegInfo->getSlotSize(), FPDiff, dl);
4494 }
4495
4496 // Build a sequence of copy-to-reg nodes chained together with token chain
4497 // and flag operands which copy the outgoing args into registers.
4498 SDValue InFlag;
4499 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4500 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4501 RegsToPass[i].second, InFlag);
4502 InFlag = Chain.getValue(1);
4503 }
4504
4505 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4506 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4506, __extension__ __PRETTY_FUNCTION__))
;
4507 // In the 64-bit large code model, we have to make all calls
4508 // through a register, since the call instruction's 32-bit
4509 // pc-relative offset may not be large enough to hold the whole
4510 // address.
4511 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4512 Callee->getOpcode() == ISD::ExternalSymbol) {
4513 // Lower direct calls to global addresses and external symbols. Setting
4514 // ForCall to true here has the effect of removing WrapperRIP when possible
4515 // to allow direct calls to be selected without first materializing the
4516 // address into a register.
4517 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4518 } else if (Subtarget.isTarget64BitILP32() &&
4519 Callee->getValueType(0) == MVT::i32) {
4520 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4521 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4522 }
4523
4524 // Returns a chain & a flag for retval copy to use.
4525 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4526 SmallVector<SDValue, 8> Ops;
4527
4528 if (!IsSibcall && isTailCall && !IsMustTail) {
4529 Chain = DAG.getCALLSEQ_END(Chain,
4530 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4531 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4532 InFlag = Chain.getValue(1);
4533 }
4534
4535 Ops.push_back(Chain);
4536 Ops.push_back(Callee);
4537
4538 if (isTailCall)
4539 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4540
4541 // Add argument registers to the end of the list so that they are known live
4542 // into the call.
4543 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4544 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4545 RegsToPass[i].second.getValueType()));
4546
4547 // Add a register mask operand representing the call-preserved registers.
4548 const uint32_t *Mask = [&]() {
4549 auto AdaptedCC = CallConv;
4550 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4551 // use X86_INTR calling convention because it has the same CSR mask
4552 // (same preserved registers).
4553 if (HasNCSR)
4554 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4555 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4556 // to use the CSR_NoRegs_RegMask.
4557 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4558 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4559 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4560 }();
4561 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4561, __extension__ __PRETTY_FUNCTION__))
;
4562
4563 // If this is an invoke in a 32-bit function using a funclet-based
4564 // personality, assume the function clobbers all registers. If an exception
4565 // is thrown, the runtime will not restore CSRs.
4566 // FIXME: Model this more precisely so that we can register allocate across
4567 // the normal edge and spill and fill across the exceptional edge.
4568 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4569 const Function &CallerFn = MF.getFunction();
4570 EHPersonality Pers =
4571 CallerFn.hasPersonalityFn()
4572 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4573 : EHPersonality::Unknown;
4574 if (isFuncletEHPersonality(Pers))
4575 Mask = RegInfo->getNoPreservedMask();
4576 }
4577
4578 // Define a new register mask from the existing mask.
4579 uint32_t *RegMask = nullptr;
4580
4581 // In some calling conventions we need to remove the used physical registers
4582 // from the reg mask.
4583 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4584 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4585
4586 // Allocate a new Reg Mask and copy Mask.
4587 RegMask = MF.allocateRegMask();
4588 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4589 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4590
4591 // Make sure all sub registers of the argument registers are reset
4592 // in the RegMask.
4593 for (auto const &RegPair : RegsToPass)
4594 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4595 SubRegs.isValid(); ++SubRegs)
4596 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4597
4598 // Create the RegMask Operand according to our updated mask.
4599 Ops.push_back(DAG.getRegisterMask(RegMask));
4600 } else {
4601 // Create the RegMask Operand according to the static mask.
4602 Ops.push_back(DAG.getRegisterMask(Mask));
4603 }
4604
4605 if (InFlag.getNode())
4606 Ops.push_back(InFlag);
4607
4608 if (isTailCall) {
4609 // We used to do:
4610 //// If this is the first return lowered for this function, add the regs
4611 //// to the liveout set for the function.
4612 // This isn't right, although it's probably harmless on x86; liveouts
4613 // should be computed from returns not tail calls. Consider a void
4614 // function making a tail call to a function returning int.
4615 MF.getFrameInfo().setHasTailCall();
4616 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4617 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4618 return Ret;
4619 }
4620
4621 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4622 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4623 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4624 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4625 // expanded to the call, directly followed by a special marker sequence and
4626 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4627 assert(!isTailCall &&(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4628, __extension__ __PRETTY_FUNCTION__))
4628 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!isTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!isTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4628, __extension__ __PRETTY_FUNCTION__))
;
4629 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")(static_cast <bool> (Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"
) ? void (0) : __assert_fail ("Is64Bit && \"clang.arc.attachedcall is only supported in 64bit mode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4629, __extension__ __PRETTY_FUNCTION__))
;
4630
4631 // Add target constant to select ObjC runtime call just before the call
4632 // target. RuntimeCallType == 0 selects objc_retainAutoreleasedReturnValue,
4633 // RuntimeCallType == 0 selects objc_unsafeClaimAutoreleasedReturnValue when
4634 // epxanding the pseudo.
4635 unsigned RuntimeCallType =
4636 objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1;
4637 Ops.insert(Ops.begin() + 1,
4638 DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32));
4639 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4640 } else {
4641 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4642 }
4643
4644 InFlag = Chain.getValue(1);
4645 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4646 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4647
4648 // Save heapallocsite metadata.
4649 if (CLI.CB)
4650 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4651 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4652
4653 // Create the CALLSEQ_END node.
4654 unsigned NumBytesForCalleeToPop;
4655 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4656 DAG.getTarget().Options.GuaranteedTailCallOpt))
4657 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4658 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4659 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4660 SR == StackStructReturn)
4661 // If this is a call to a struct-return function, the callee
4662 // pops the hidden struct pointer, so we have to push it back.
4663 // This is common for Darwin/X86, Linux & Mingw32 targets.
4664 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4665 NumBytesForCalleeToPop = 4;
4666 else
4667 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4668
4669 // Returns a flag for retval copy to use.
4670 if (!IsSibcall) {
4671 Chain = DAG.getCALLSEQ_END(Chain,
4672 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4673 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4674 true),
4675 InFlag, dl);
4676 InFlag = Chain.getValue(1);
4677 }
4678
4679 // Handle result values, copying them out of physregs into vregs that we
4680 // return.
4681 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4682 InVals, RegMask);
4683}
4684
4685//===----------------------------------------------------------------------===//
4686// Fast Calling Convention (tail call) implementation
4687//===----------------------------------------------------------------------===//
4688
4689// Like std call, callee cleans arguments, convention except that ECX is
4690// reserved for storing the tail called function address. Only 2 registers are
4691// free for argument passing (inreg). Tail call optimization is performed
4692// provided:
4693// * tailcallopt is enabled
4694// * caller/callee are fastcc
4695// On X86_64 architecture with GOT-style position independent code only local
4696// (within module) calls are supported at the moment.
4697// To keep the stack aligned according to platform abi the function
4698// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4699// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4700// If a tail called function callee has more arguments than the caller the
4701// caller needs to make sure that there is room to move the RETADDR to. This is
4702// achieved by reserving an area the size of the argument delta right after the
4703// original RETADDR, but before the saved framepointer or the spilled registers
4704// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4705// stack layout:
4706// arg1
4707// arg2
4708// RETADDR
4709// [ new RETADDR
4710// move area ]
4711// (possible EBP)
4712// ESI
4713// EDI
4714// local1 ..
4715
4716/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4717/// requirement.
4718unsigned
4719X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4720 SelectionDAG &DAG) const {
4721 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4722 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4723 assert(StackSize % SlotSize == 0 &&(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4724, __extension__ __PRETTY_FUNCTION__))
4724 "StackSize must be a multiple of SlotSize")(static_cast <bool> (StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize") ? void (0) : __assert_fail
("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4724, __extension__ __PRETTY_FUNCTION__))
;
4725 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4726}
4727
4728/// Return true if the given stack call argument is already available in the
4729/// same position (relatively) of the caller's incoming argument stack.
4730static
4731bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4732 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4733 const X86InstrInfo *TII, const CCValAssign &VA) {
4734 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4735
4736 for (;;) {
4737 // Look through nodes that don't alter the bits of the incoming value.
4738 unsigned Op = Arg.getOpcode();
4739 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4740 Arg = Arg.getOperand(0);
4741 continue;
4742 }
4743 if (Op == ISD::TRUNCATE) {
4744 const SDValue &TruncInput = Arg.getOperand(0);
4745 if (TruncInput.getOpcode() == ISD::AssertZext &&
4746 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4747 Arg.getValueType()) {
4748 Arg = TruncInput.getOperand(0);
4749 continue;
4750 }
4751 }
4752 break;
4753 }
4754
4755 int FI = INT_MAX2147483647;
4756 if (Arg.getOpcode() == ISD::CopyFromReg) {
4757 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4758 if (!VR.isVirtual())
4759 return false;
4760 MachineInstr *Def = MRI->getVRegDef(VR);
4761 if (!Def)
4762 return false;
4763 if (!Flags.isByVal()) {
4764 if (!TII->isLoadFromStackSlot(*Def, FI))
4765 return false;
4766 } else {
4767 unsigned Opcode = Def->getOpcode();
4768 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4769 Opcode == X86::LEA64_32r) &&
4770 Def->getOperand(1).isFI()) {
4771 FI = Def->getOperand(1).getIndex();
4772 Bytes = Flags.getByValSize();
4773 } else
4774 return false;
4775 }
4776 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4777 if (Flags.isByVal())
4778 // ByVal argument is passed in as a pointer but it's now being
4779 // dereferenced. e.g.
4780 // define @foo(%struct.X* %A) {
4781 // tail call @bar(%struct.X* byval %A)
4782 // }
4783 return false;
4784 SDValue Ptr = Ld->getBasePtr();
4785 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4786 if (!FINode)
4787 return false;
4788 FI = FINode->getIndex();
4789 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4790 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4791 FI = FINode->getIndex();
4792 Bytes = Flags.getByValSize();
4793 } else
4794 return false;
4795
4796 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4796, __extension__ __PRETTY_FUNCTION__))
;
4797 if (!MFI.isFixedObjectIndex(FI))
4798 return false;
4799
4800 if (Offset != MFI.getObjectOffset(FI))
4801 return false;
4802
4803 // If this is not byval, check that the argument stack object is immutable.
4804 // inalloca and argument copy elision can create mutable argument stack
4805 // objects. Byval objects can be mutated, but a byval call intends to pass the
4806 // mutated memory.
4807 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4808 return false;
4809
4810 if (VA.getLocVT().getFixedSizeInBits() >
4811 Arg.getValueSizeInBits().getFixedSize()) {
4812 // If the argument location is wider than the argument type, check that any
4813 // extension flags match.
4814 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4815 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4816 return false;
4817 }
4818 }
4819
4820 return Bytes == MFI.getObjectSize(FI);
4821}
4822
4823/// Check whether the call is eligible for tail call optimization. Targets
4824/// that want to do tail call optimization should implement this function.
4825bool X86TargetLowering::IsEligibleForTailCallOptimization(
4826 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
4827 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
4828 const SmallVectorImpl<SDValue> &OutVals,
4829 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4830 if (!mayTailCallThisCC(CalleeCC))
4831 return false;
4832
4833 // If -tailcallopt is specified, make fastcc functions tail-callable.
4834 MachineFunction &MF = DAG.getMachineFunction();
4835 const Function &CallerF = MF.getFunction();
4836
4837 // If the function return type is x86_fp80 and the callee return type is not,
4838 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4839 // perform a tailcall optimization here.
4840 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4841 return false;
4842
4843 CallingConv::ID CallerCC = CallerF.getCallingConv();
4844 bool CCMatch = CallerCC == CalleeCC;
4845 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4846 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4847 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4848 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
4849
4850 // Win64 functions have extra shadow space for argument homing. Don't do the
4851 // sibcall if the caller and callee have mismatched expectations for this
4852 // space.
4853 if (IsCalleeWin64 != IsCallerWin64)
4854 return false;
4855
4856 if (IsGuaranteeTCO) {
4857 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4858 return true;
4859 return false;
4860 }
4861
4862 // Look for obvious safe cases to perform tail call optimization that do not
4863 // require ABI changes. This is what gcc calls sibcall.
4864
4865 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4866 // emit a special epilogue.
4867 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4868 if (RegInfo->hasStackRealignment(MF))
4869 return false;
4870
4871 // Also avoid sibcall optimization if we're an sret return fn and the callee
4872 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
4873 // insufficient.
4874 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
4875 // For a compatible tail call the callee must return our sret pointer. So it
4876 // needs to be (a) an sret function itself and (b) we pass our sret as its
4877 // sret. Condition #b is harder to determine.
4878 return false;
4879 } else if (Subtarget.is32Bit() && IsCalleeStackStructRet)
4880 // In the i686 ABI, the sret pointer is callee-pop, so we cannot tail-call,
4881 // as our caller doesn't expect that.
4882 return false;
4883
4884 // Do not sibcall optimize vararg calls unless all arguments are passed via
4885 // registers.
4886 LLVMContext &C = *DAG.getContext();
4887 if (isVarArg && !Outs.empty()) {
4888 // Optimizing for varargs on Win64 is unlikely to be safe without
4889 // additional testing.
4890 if (IsCalleeWin64 || IsCallerWin64)
4891 return false;
4892
4893 SmallVector<CCValAssign, 16> ArgLocs;
4894 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4895
4896 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4897 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4898 if (!ArgLocs[i].isRegLoc())
4899 return false;
4900 }
4901
4902 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4903 // stack. Therefore, if it's not used by the call it is not safe to optimize
4904 // this into a sibcall.
4905 bool Unused = false;
4906 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4907 if (!Ins[i].Used) {
4908 Unused = true;
4909 break;
4910 }
4911 }
4912 if (Unused) {
4913 SmallVector<CCValAssign, 16> RVLocs;
4914 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4915 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4916 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4917 CCValAssign &VA = RVLocs[i];
4918 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4919 return false;
4920 }
4921 }
4922
4923 // Check that the call results are passed in the same way.
4924 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4925 RetCC_X86, RetCC_X86))
4926 return false;
4927 // The callee has to preserve all registers the caller needs to preserve.
4928 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4929 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4930 if (!CCMatch) {
4931 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4932 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4933 return false;
4934 }
4935
4936 unsigned StackArgsSize = 0;
4937
4938 // If the callee takes no arguments then go on to check the results of the
4939 // call.
4940 if (!Outs.empty()) {
4941 // Check if stack adjustment is needed. For now, do not do this if any
4942 // argument is passed on the stack.
4943 SmallVector<CCValAssign, 16> ArgLocs;
4944 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4945
4946 // Allocate shadow area for Win64
4947 if (IsCalleeWin64)
4948 CCInfo.AllocateStack(32, Align(8));
4949
4950 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4951 StackArgsSize = CCInfo.getNextStackOffset();
4952
4953 if (CCInfo.getNextStackOffset()) {
4954 // Check if the arguments are already laid out in the right way as
4955 // the caller's fixed stack objects.
4956 MachineFrameInfo &MFI = MF.getFrameInfo();
4957 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4958 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4959 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4960 CCValAssign &VA = ArgLocs[i];
4961 SDValue Arg = OutVals[i];
4962 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4963 if (VA.getLocInfo() == CCValAssign::Indirect)
4964 return false;
4965 if (!VA.isRegLoc()) {
4966 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4967 MFI, MRI, TII, VA))
4968 return false;
4969 }
4970 }
4971 }
4972
4973 bool PositionIndependent = isPositionIndependent();
4974 // If the tailcall address may be in a register, then make sure it's
4975 // possible to register allocate for it. In 32-bit, the call address can
4976 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4977 // callee-saved registers are restored. These happen to be the same
4978 // registers used to pass 'inreg' arguments so watch out for those.
4979 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4980 !isa<ExternalSymbolSDNode>(Callee)) ||
4981 PositionIndependent)) {
4982 unsigned NumInRegs = 0;
4983 // In PIC we need an extra register to formulate the address computation
4984 // for the callee.
4985 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4986
4987 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4988 CCValAssign &VA = ArgLocs[i];
4989 if (!VA.isRegLoc())
4990 continue;
4991 Register Reg = VA.getLocReg();
4992 switch (Reg) {
4993 default: break;
4994 case X86::EAX: case X86::EDX: case X86::ECX:
4995 if (++NumInRegs == MaxInRegs)
4996 return false;
4997 break;
4998 }
4999 }
5000 }
5001
5002 const MachineRegisterInfo &MRI = MF.getRegInfo();
5003 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5004 return false;
5005 }
5006
5007 bool CalleeWillPop =
5008 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
5009 MF.getTarget().Options.GuaranteedTailCallOpt);
5010
5011 if (unsigned BytesToPop =
5012 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
5013 // If we have bytes to pop, the callee must pop them.
5014 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
5015 if (!CalleePopMatches)
5016 return false;
5017 } else if (CalleeWillPop && StackArgsSize > 0) {
5018 // If we don't have bytes to pop, make sure the callee doesn't pop any.
5019 return false;
5020 }
5021
5022 return true;
5023}
5024
5025FastISel *
5026X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
5027 const TargetLibraryInfo *libInfo) const {
5028 return X86::createFastISel(funcInfo, libInfo);
5029}
5030
5031//===----------------------------------------------------------------------===//
5032// Other Lowering Hooks
5033//===----------------------------------------------------------------------===//
5034
5035static bool MayFoldLoad(SDValue Op, bool AssumeSingleUse = false) {
5036 return (AssumeSingleUse || Op.hasOneUse()) && ISD::isNormalLoad(Op.getNode());
5037}
5038
5039static bool MayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
5040 bool AssumeSingleUse = false) {
5041 if (!MayFoldLoad(Op, AssumeSingleUse))
5042 return false;
5043
5044 // We can not replace a wide volatile load with a broadcast-from-memory,
5045 // because that would narrow the load, which isn't legal for volatiles.
5046 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op.getNode());
5047 return !Ld->isVolatile() ||
5048 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
5049}
5050
5051static bool MayFoldIntoStore(SDValue Op) {
5052 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
5053}
5054
5055static bool MayFoldIntoZeroExtend(SDValue Op) {
5056 if (Op.hasOneUse()) {
5057 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
5058 return (ISD::ZERO_EXTEND == Opcode);
5059 }
5060 return false;
5061}
5062
5063static bool isTargetShuffle(unsigned Opcode) {
5064 switch(Opcode) {
5065 default: return false;
5066 case X86ISD::BLENDI:
5067 case X86ISD::PSHUFB:
5068 case X86ISD::PSHUFD:
5069 case X86ISD::PSHUFHW:
5070 case X86ISD::PSHUFLW:
5071 case X86ISD::SHUFP:
5072 case X86ISD::INSERTPS:
5073 case X86ISD::EXTRQI:
5074 case X86ISD::INSERTQI:
5075 case X86ISD::VALIGN:
5076 case X86ISD::PALIGNR:
5077 case X86ISD::VSHLDQ:
5078 case X86ISD::VSRLDQ:
5079 case X86ISD::MOVLHPS:
5080 case X86ISD::MOVHLPS:
5081 case X86ISD::MOVSHDUP:
5082 case X86ISD::MOVSLDUP:
5083 case X86ISD::MOVDDUP:
5084 case X86ISD::MOVSS:
5085 case X86ISD::MOVSD:
5086 case X86ISD::MOVSH:
5087 case X86ISD::UNPCKL:
5088 case X86ISD::UNPCKH:
5089 case X86ISD::VBROADCAST:
5090 case X86ISD::VPERMILPI:
5091 case X86ISD::VPERMILPV:
5092 case X86ISD::VPERM2X128:
5093 case X86ISD::SHUF128:
5094 case X86ISD::VPERMIL2:
5095 case X86ISD::VPERMI:
5096 case X86ISD::VPPERM:
5097 case X86ISD::VPERMV:
5098 case X86ISD::VPERMV3:
5099 case X86ISD::VZEXT_MOVL:
5100 return true;
5101 }
5102}
5103
5104static bool isTargetShuffleVariableMask(unsigned Opcode) {
5105 switch (Opcode) {
5106 default: return false;
5107 // Target Shuffles.
5108 case X86ISD::PSHUFB:
5109 case X86ISD::VPERMILPV:
5110 case X86ISD::VPERMIL2:
5111 case X86ISD::VPPERM:
5112 case X86ISD::VPERMV:
5113 case X86ISD::VPERMV3:
5114 return true;
5115 // 'Faux' Target Shuffles.
5116 case ISD::OR:
5117 case ISD::AND:
5118 case X86ISD::ANDNP:
5119 return true;
5120 }
5121}
5122
5123static bool isTargetShuffleSplat(SDValue Op) {
5124 unsigned Opcode = Op.getOpcode();
5125 if (Opcode == ISD::EXTRACT_SUBVECTOR)
5126 return isTargetShuffleSplat(Op.getOperand(0));
5127 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
5128}
5129
5130SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
5131 MachineFunction &MF = DAG.getMachineFunction();
5132 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
5133 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
5134 int ReturnAddrIndex = FuncInfo->getRAIndex();
5135
5136 if (ReturnAddrIndex == 0) {
5137 // Set up a frame object for the return address.
5138 unsigned SlotSize = RegInfo->getSlotSize();
5139 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
5140 -(int64_t)SlotSize,
5141 false);
5142 FuncInfo->setRAIndex(ReturnAddrIndex);
5143 }
5144
5145 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
5146}
5147
5148bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
5149 bool hasSymbolicDisplacement) {
5150 // Offset should fit into 32 bit immediate field.
5151 if (!isInt<32>(Offset))
5152 return false;
5153
5154 // If we don't have a symbolic displacement - we don't have any extra
5155 // restrictions.
5156 if (!hasSymbolicDisplacement)
5157 return true;
5158
5159 // FIXME: Some tweaks might be needed for medium code model.
5160 if (M != CodeModel::Small && M != CodeModel::Kernel)
5161 return false;
5162
5163 // For small code model we assume that latest object is 16MB before end of 31
5164 // bits boundary. We may also accept pretty large negative constants knowing
5165 // that all objects are in the positive half of address space.
5166 if (M == CodeModel::Small && Offset < 16*1024*1024)
5167 return true;
5168
5169 // For kernel code model we know that all object resist in the negative half
5170 // of 32bits address space. We may not accept negative offsets, since they may
5171 // be just off and we may accept pretty large positive ones.
5172 if (M == CodeModel::Kernel && Offset >= 0)
5173 return true;
5174
5175 return false;
5176}
5177
5178/// Determines whether the callee is required to pop its own arguments.
5179/// Callee pop is necessary to support tail calls.
5180bool X86::isCalleePop(CallingConv::ID CallingConv,
5181 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
5182 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
5183 // can guarantee TCO.
5184 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
5185 return true;
5186
5187 switch (CallingConv) {
5188 default:
5189 return false;
5190 case CallingConv::X86_StdCall:
5191 case CallingConv::X86_FastCall:
5192 case CallingConv::X86_ThisCall:
5193 case CallingConv::X86_VectorCall:
5194 return !is64Bit;
5195 }
5196}
5197
5198/// Return true if the condition is an signed comparison operation.
5199static bool isX86CCSigned(unsigned X86CC) {
5200 switch (X86CC) {
5201 default:
5202 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5202)
;
5203 case X86::COND_E:
5204 case X86::COND_NE:
5205 case X86::COND_B:
5206 case X86::COND_A:
5207 case X86::COND_BE:
5208 case X86::COND_AE:
5209 return false;
5210 case X86::COND_G:
5211 case X86::COND_GE:
5212 case X86::COND_L:
5213 case X86::COND_LE:
5214 return true;
5215 }
5216}
5217
5218static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5219 switch (SetCCOpcode) {
5220 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5220)
;
5221 case ISD::SETEQ: return X86::COND_E;
5222 case ISD::SETGT: return X86::COND_G;
5223 case ISD::SETGE: return X86::COND_GE;
5224 case ISD::SETLT: return X86::COND_L;
5225 case ISD::SETLE: return X86::COND_LE;
5226 case ISD::SETNE: return X86::COND_NE;
5227 case ISD::SETULT: return X86::COND_B;
5228 case ISD::SETUGT: return X86::COND_A;
5229 case ISD::SETULE: return X86::COND_BE;
5230 case ISD::SETUGE: return X86::COND_AE;
5231 }
5232}
5233
5234/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5235/// condition code, returning the condition code and the LHS/RHS of the
5236/// comparison to make.
5237static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5238 bool isFP, SDValue &LHS, SDValue &RHS,
5239 SelectionDAG &DAG) {
5240 if (!isFP) {
5241 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5242 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
5243 // X > -1 -> X == 0, jump !sign.
5244 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5245 return X86::COND_NS;
5246 }
5247 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
5248 // X < 0 -> X == 0, jump on sign.
5249 return X86::COND_S;
5250 }
5251 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
5252 // X >= 0 -> X == 0, jump on !sign.
5253 return X86::COND_NS;
5254 }
5255 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5256 // X < 1 -> X <= 0
5257 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5258 return X86::COND_LE;
5259 }
5260 }
5261
5262 return TranslateIntegerX86CC(SetCCOpcode);
5263 }
5264
5265 // First determine if it is required or is profitable to flip the operands.
5266
5267 // If LHS is a foldable load, but RHS is not, flip the condition.
5268 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5269 !ISD::isNON_EXTLoad(RHS.getNode())) {
5270 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5271 std::swap(LHS, RHS);
5272 }
5273
5274 switch (SetCCOpcode) {
5275 default: break;
5276 case ISD::SETOLT:
5277 case ISD::SETOLE:
5278 case ISD::SETUGT:
5279 case ISD::SETUGE:
5280 std::swap(LHS, RHS);
5281 break;
5282 }
5283
5284 // On a floating point condition, the flags are set as follows:
5285 // ZF PF CF op
5286 // 0 | 0 | 0 | X > Y
5287 // 0 | 0 | 1 | X < Y
5288 // 1 | 0 | 0 | X == Y
5289 // 1 | 1 | 1 | unordered
5290 switch (SetCCOpcode) {
5291 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5291)
;
5292 case ISD::SETUEQ:
5293 case ISD::SETEQ: return X86::COND_E;
5294 case ISD::SETOLT: // flipped
5295 case ISD::SETOGT:
5296 case ISD::SETGT: return X86::COND_A;
5297 case ISD::SETOLE: // flipped
5298 case ISD::SETOGE:
5299 case ISD::SETGE: return X86::COND_AE;
5300 case ISD::SETUGT: // flipped
5301 case ISD::SETULT:
5302 case ISD::SETLT: return X86::COND_B;
5303 case ISD::SETUGE: // flipped
5304 case ISD::SETULE:
5305 case ISD::SETLE: return X86::COND_BE;
5306 case ISD::SETONE:
5307 case ISD::SETNE: return X86::COND_NE;
5308 case ISD::SETUO: return X86::COND_P;
5309 case ISD::SETO: return X86::COND_NP;
5310 case ISD::SETOEQ:
5311 case ISD::SETUNE: return X86::COND_INVALID;
5312 }
5313}
5314
5315/// Is there a floating point cmov for the specific X86 condition code?
5316/// Current x86 isa includes the following FP cmov instructions:
5317/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5318static bool hasFPCMov(unsigned X86CC) {
5319 switch (X86CC) {
5320 default:
5321 return false;
5322 case X86::COND_B:
5323 case X86::COND_BE:
5324 case X86::COND_E:
5325 case X86::COND_P:
5326 case X86::COND_A:
5327 case X86::COND_AE:
5328 case X86::COND_NE:
5329 case X86::COND_NP:
5330 return true;
5331 }
5332}
5333
5334
5335bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5336 const CallInst &I,
5337 MachineFunction &MF,
5338 unsigned Intrinsic) const {
5339 Info.flags = MachineMemOperand::MONone;
5340 Info.offset = 0;
5341
5342 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5343 if (!IntrData) {
5344 switch (Intrinsic) {
5345 case Intrinsic::x86_aesenc128kl:
5346 case Intrinsic::x86_aesdec128kl:
5347 Info.opc = ISD::INTRINSIC_W_CHAIN;
5348 Info.ptrVal = I.getArgOperand(1);
5349 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5350 Info.align = Align(1);
5351 Info.flags |= MachineMemOperand::MOLoad;
5352 return true;
5353 case Intrinsic::x86_aesenc256kl:
5354 case Intrinsic::x86_aesdec256kl:
5355 Info.opc = ISD::INTRINSIC_W_CHAIN;
5356 Info.ptrVal = I.getArgOperand(1);
5357 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5358 Info.align = Align(1);
5359 Info.flags |= MachineMemOperand::MOLoad;
5360 return true;
5361 case Intrinsic::x86_aesencwide128kl:
5362 case Intrinsic::x86_aesdecwide128kl:
5363 Info.opc = ISD::INTRINSIC_W_CHAIN;
5364 Info.ptrVal = I.getArgOperand(0);
5365 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5366 Info.align = Align(1);
5367 Info.flags |= MachineMemOperand::MOLoad;
5368 return true;
5369 case Intrinsic::x86_aesencwide256kl:
5370 case Intrinsic::x86_aesdecwide256kl:
5371 Info.opc = ISD::INTRINSIC_W_CHAIN;
5372 Info.ptrVal = I.getArgOperand(0);
5373 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5374 Info.align = Align(1);
5375 Info.flags |= MachineMemOperand::MOLoad;
5376 return true;
5377 }
5378 return false;
5379 }
5380
5381 switch (IntrData->Type) {
5382 case TRUNCATE_TO_MEM_VI8:
5383 case TRUNCATE_TO_MEM_VI16:
5384 case TRUNCATE_TO_MEM_VI32: {
5385 Info.opc = ISD::INTRINSIC_VOID;
5386 Info.ptrVal = I.getArgOperand(0);
5387 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5388 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5389 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5390 ScalarVT = MVT::i8;
5391 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5392 ScalarVT = MVT::i16;
5393 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5394 ScalarVT = MVT::i32;
5395
5396 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5397 Info.align = Align(1);
5398 Info.flags |= MachineMemOperand::MOStore;
5399 break;
5400 }
5401 case GATHER:
5402 case GATHER_AVX2: {
5403 Info.opc = ISD::INTRINSIC_W_CHAIN;
5404 Info.ptrVal = nullptr;
5405 MVT DataVT = MVT::getVT(I.getType());
5406 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5407 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5408 IndexVT.getVectorNumElements());
5409 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5410 Info.align = Align(1);
5411 Info.flags |= MachineMemOperand::MOLoad;
5412 break;
5413 }
5414 case SCATTER: {
5415 Info.opc = ISD::INTRINSIC_VOID;
5416 Info.ptrVal = nullptr;
5417 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5418 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5419 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5420 IndexVT.getVectorNumElements());
5421 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5422 Info.align = Align(1);
5423 Info.flags |= MachineMemOperand::MOStore;
5424 break;
5425 }
5426 default:
5427 return false;
5428 }
5429
5430 return true;
5431}
5432
5433/// Returns true if the target can instruction select the
5434/// specified FP immediate natively. If false, the legalizer will
5435/// materialize the FP immediate as a load from a constant pool.
5436bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5437 bool ForCodeSize) const {
5438 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5439 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5440 return true;
5441 }
5442 return false;
5443}
5444
5445bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5446 ISD::LoadExtType ExtTy,
5447 EVT NewVT) const {
5448 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")(static_cast <bool> (cast<LoadSDNode>(Load)->isSimple
() && "illegal to narrow") ? void (0) : __assert_fail
("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __extension__ __PRETTY_FUNCTION__))
;
5449
5450 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5451 // relocation target a movq or addq instruction: don't let the load shrink.
5452 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5453 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5454 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5455 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5456
5457 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5458 // those uses are extracted directly into a store, then the extract + store
5459 // can be store-folded. Therefore, it's probably not worth splitting the load.
5460 EVT VT = Load->getValueType(0);
5461 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5462 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5463 // Skip uses of the chain value. Result 0 of the node is the load value.
5464 if (UI.getUse().getResNo() != 0)
5465 continue;
5466
5467 // If this use is not an extract + store, it's probably worth splitting.
5468 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5469 UI->use_begin()->getOpcode() != ISD::STORE)
5470 return true;
5471 }
5472 // All non-chain uses are extract + store.
5473 return false;
5474 }
5475
5476 return true;
5477}
5478
5479/// Returns true if it is beneficial to convert a load of a constant
5480/// to just the constant itself.
5481bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5482 Type *Ty) const {
5483 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5483, __extension__ __PRETTY_FUNCTION__))
;
5484
5485 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5486 if (BitSize == 0 || BitSize > 64)
5487 return false;
5488 return true;
5489}
5490
5491bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5492 // If we are using XMM registers in the ABI and the condition of the select is
5493 // a floating-point compare and we have blendv or conditional move, then it is
5494 // cheaper to select instead of doing a cross-register move and creating a
5495 // load that depends on the compare result.
5496 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5497 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5498}
5499
5500bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5501 // TODO: It might be a win to ease or lift this restriction, but the generic
5502 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5503 if (VT.isVector() && Subtarget.hasAVX512())
5504 return false;
5505
5506 return true;
5507}
5508
5509bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5510 SDValue C) const {
5511 // TODO: We handle scalars using custom code, but generic combining could make
5512 // that unnecessary.
5513 APInt MulC;
5514 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5515 return false;
5516
5517 // Find the type this will be legalized too. Otherwise we might prematurely
5518 // convert this to shl+add/sub and then still have to type legalize those ops.
5519 // Another choice would be to defer the decision for illegal types until
5520 // after type legalization. But constant splat vectors of i64 can't make it
5521 // through type legalization on 32-bit targets so we would need to special
5522 // case vXi64.
5523 while (getTypeAction(Context, VT) != TypeLegal)
5524 VT = getTypeToTransformTo(Context, VT);
5525
5526 // If vector multiply is legal, assume that's faster than shl + add/sub.
5527 // TODO: Multiply is a complex op with higher latency and lower throughput in
5528 // most implementations, so this check could be loosened based on type
5529 // and/or a CPU attribute.
5530 if (isOperationLegal(ISD::MUL, VT))
5531 return false;
5532
5533 // shl+add, shl+sub, shl+add+neg
5534 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5535 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5536}
5537
5538bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5539 unsigned Index) const {
5540 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5541 return false;
5542
5543 // Mask vectors support all subregister combinations and operations that
5544 // extract half of vector.
5545 if (ResVT.getVectorElementType() == MVT::i1)
5546 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5547 (Index == ResVT.getVectorNumElements()));
5548
5549 return (Index % ResVT.getVectorNumElements()) == 0;
5550}
5551
5552bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5553 unsigned Opc = VecOp.getOpcode();
5554
5555 // Assume target opcodes can't be scalarized.
5556 // TODO - do we have any exceptions?
5557 if (Opc >= ISD::BUILTIN_OP_END)
5558 return false;
5559
5560 // If the vector op is not supported, try to convert to scalar.
5561 EVT VecVT = VecOp.getValueType();
5562 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5563 return true;
5564
5565 // If the vector op is supported, but the scalar op is not, the transform may
5566 // not be worthwhile.
5567 EVT ScalarVT = VecVT.getScalarType();
5568 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5569}
5570
5571bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5572 bool) const {
5573 // TODO: Allow vectors?
5574 if (VT.isVector())
5575 return false;
5576 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5577}
5578
5579bool X86TargetLowering::isCheapToSpeculateCttz() const {
5580 // Speculate cttz only if we can directly use TZCNT.
5581 return Subtarget.hasBMI();
5582}
5583
5584bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5585 // Speculate ctlz only if we can directly use LZCNT.
5586 return Subtarget.hasLZCNT();
5587}
5588
5589bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5590 const SelectionDAG &DAG,
5591 const MachineMemOperand &MMO) const {
5592 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5593 BitcastVT.getVectorElementType() == MVT::i1)
5594 return false;
5595
5596 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5597 return false;
5598
5599 // If both types are legal vectors, it's always ok to convert them.
5600 if (LoadVT.isVector() && BitcastVT.isVector() &&
5601 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5602 return true;
5603
5604 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5605}
5606
5607bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5608 const MachineFunction &MF) const {
5609 // Do not merge to float value size (128 bytes) if no implicit
5610 // float attribute is set.
5611 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
5612
5613 if (NoFloat) {
5614 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5615 return (MemVT.getSizeInBits() <= MaxIntSize);
5616 }
5617 // Make sure we don't merge greater than our preferred vector
5618 // width.
5619 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5620 return false;
5621
5622 return true;
5623}
5624
5625bool X86TargetLowering::isCtlzFast() const {
5626 return Subtarget.hasFastLZCNT();
5627}
5628
5629bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5630 const Instruction &AndI) const {
5631 return true;
5632}
5633
5634bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5635 EVT VT = Y.getValueType();
5636
5637 if (VT.isVector())
5638 return false;
5639
5640 if (!Subtarget.hasBMI())
5641 return false;
5642
5643 // There are only 32-bit and 64-bit forms for 'andn'.
5644 if (VT != MVT::i32 && VT != MVT::i64)
5645 return false;
5646
5647 return !isa<ConstantSDNode>(Y);
5648}
5649
5650bool X86TargetLowering::hasAndNot(SDValue Y) const {
5651 EVT VT = Y.getValueType();
5652
5653 if (!VT.isVector())
5654 return hasAndNotCompare(Y);
5655
5656 // Vector.
5657
5658 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5659 return false;
5660
5661 if (VT == MVT::v4i32)
5662 return true;
5663
5664 return Subtarget.hasSSE2();
5665}
5666
5667bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5668 return X.getValueType().isScalarInteger(); // 'bt'
5669}
5670
5671bool X86TargetLowering::
5672 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5673 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5674 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5675 SelectionDAG &DAG) const {
5676 // Does baseline recommend not to perform the fold by default?
5677 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5678 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5679 return false;
5680 // For scalars this transform is always beneficial.
5681 if (X.getValueType().isScalarInteger())
5682 return true;
5683 // If all the shift amounts are identical, then transform is beneficial even
5684 // with rudimentary SSE2 shifts.
5685 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5686 return true;
5687 // If we have AVX2 with it's powerful shift operations, then it's also good.
5688 if (Subtarget.hasAVX2())
5689 return true;
5690 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5691 return NewShiftOpcode == ISD::SHL;
5692}
5693
5694bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5695 const SDNode *N, CombineLevel Level) const {
5696 assert(((N->getOpcode() == ISD::SHL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5700, __extension__ __PRETTY_FUNCTION__))
5697 N->getOperand(0).getOpcode() == ISD::SRL) ||(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5700, __extension__ __PRETTY_FUNCTION__))
5698 (N->getOpcode() == ISD::SRL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5700, __extension__ __PRETTY_FUNCTION__))
5699 N->getOperand(0).getOpcode() == ISD::SHL)) &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5700, __extension__ __PRETTY_FUNCTION__))
5700 "Expected shift-shift mask")(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5700, __extension__ __PRETTY_FUNCTION__))
;
5701 EVT VT = N->getValueType(0);
5702 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5703 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5704 // Only fold if the shift values are equal - so it folds to AND.
5705 // TODO - we should fold if either is a non-uniform vector but we don't do
5706 // the fold for non-splats yet.
5707 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5708 }
5709 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5710}
5711
5712bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5713 EVT VT = Y.getValueType();
5714
5715 // For vectors, we don't have a preference, but we probably want a mask.
5716 if (VT.isVector())
5717 return false;
5718
5719 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5720 if (VT == MVT::i64 && !Subtarget.is64Bit())
5721 return false;
5722
5723 return true;
5724}
5725
5726bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5727 SDNode *N) const {
5728 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5729 !Subtarget.isOSWindows())
5730 return false;
5731 return true;
5732}
5733
5734bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5735 // Any legal vector type can be splatted more efficiently than
5736 // loading/spilling from memory.
5737 return isTypeLegal(VT);
5738}
5739
5740MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5741 MVT VT = MVT::getIntegerVT(NumBits);
5742 if (isTypeLegal(VT))
5743 return VT;
5744
5745 // PMOVMSKB can handle this.
5746 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5747 return MVT::v16i8;
5748
5749 // VPMOVMSKB can handle this.
5750 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5751 return MVT::v32i8;
5752
5753 // TODO: Allow 64-bit type for 32-bit target.
5754 // TODO: 512-bit types should be allowed, but make sure that those
5755 // cases are handled in combineVectorSizedSetCCEquality().
5756
5757 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5758}
5759
5760/// Val is the undef sentinel value or equal to the specified value.
5761static bool isUndefOrEqual(int Val, int CmpVal) {
5762 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5763}
5764
5765/// Return true if every element in Mask is the undef sentinel value or equal to
5766/// the specified value..
5767static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
5768 return llvm::all_of(Mask, [CmpVal](int M) {
5769 return (M == SM_SentinelUndef) || (M == CmpVal);
5770 });
5771}
5772
5773/// Val is either the undef or zero sentinel value.
5774static bool isUndefOrZero(int Val) {
5775 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5776}
5777
5778/// Return true if every element in Mask, beginning from position Pos and ending
5779/// in Pos+Size is the undef sentinel value.
5780static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5781 return llvm::all_of(Mask.slice(Pos, Size),
5782 [](int M) { return M == SM_SentinelUndef; });
5783}
5784
5785/// Return true if the mask creates a vector whose lower half is undefined.
5786static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5787 unsigned NumElts = Mask.size();
5788 return isUndefInRange(Mask, 0, NumElts / 2);
5789}
5790
5791/// Return true if the mask creates a vector whose upper half is undefined.
5792static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5793 unsigned NumElts = Mask.size();
5794 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5795}
5796
5797/// Return true if Val falls within the specified range (L, H].
5798static bool isInRange(int Val, int Low, int Hi) {
5799 return (Val >= Low && Val < Hi);
5800}
5801
5802/// Return true if the value of any element in Mask falls within the specified
5803/// range (L, H].
5804static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5805 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5806}
5807
5808/// Return true if the value of any element in Mask is the zero sentinel value.
5809static bool isAnyZero(ArrayRef<int> Mask) {
5810 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5811}
5812
5813/// Return true if the value of any element in Mask is the zero or undef
5814/// sentinel values.
5815static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5816 return llvm::any_of(Mask, [](int M) {
5817 return M == SM_SentinelZero || M == SM_SentinelUndef;
5818 });
5819}
5820
5821/// Return true if Val is undef or if its value falls within the
5822/// specified range (L, H].
5823static bool isUndefOrInRange(int Val, int Low, int Hi) {
5824 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5825}
5826
5827/// Return true if every element in Mask is undef or if its value
5828/// falls within the specified range (L, H].
5829static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5830 return llvm::all_of(
5831 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5832}
5833
5834/// Return true if Val is undef, zero or if its value falls within the
5835/// specified range (L, H].
5836static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5837 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5838}
5839
5840/// Return true if every element in Mask is undef, zero or if its value
5841/// falls within the specified range (L, H].
5842static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5843 return llvm::all_of(
5844 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5845}
5846
5847/// Return true if every element in Mask, beginning
5848/// from position Pos and ending in Pos + Size, falls within the specified
5849/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5850static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5851 unsigned Size, int Low, int Step = 1) {
5852 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5853 if (!isUndefOrEqual(Mask[i], Low))
5854 return false;
5855 return true;
5856}
5857
5858/// Return true if every element in Mask, beginning
5859/// from position Pos and ending in Pos+Size, falls within the specified
5860/// sequential range (Low, Low+Size], or is undef or is zero.
5861static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5862 unsigned Size, int Low,
5863 int Step = 1) {
5864 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5865 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5866 return false;
5867 return true;
5868}
5869
5870/// Return true if every element in Mask, beginning
5871/// from position Pos and ending in Pos+Size is undef or is zero.
5872static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5873 unsigned Size) {
5874 return llvm::all_of(Mask.slice(Pos, Size),
5875 [](int M) { return isUndefOrZero(M); });
5876}
5877
5878/// Helper function to test whether a shuffle mask could be
5879/// simplified by widening the elements being shuffled.
5880///
5881/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5882/// leaves it in an unspecified state.
5883///
5884/// NOTE: This must handle normal vector shuffle masks and *target* vector
5885/// shuffle masks. The latter have the special property of a '-2' representing
5886/// a zero-ed lane of a vector.
5887static bool canWidenShuffleElements(ArrayRef<int> Mask,
5888 SmallVectorImpl<int> &WidenedMask) {
5889 WidenedMask.assign(Mask.size() / 2, 0);
5890 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5891 int M0 = Mask[i];
5892 int M1 = Mask[i + 1];
5893
5894 // If both elements are undef, its trivial.
5895 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5896 WidenedMask[i / 2] = SM_SentinelUndef;
5897 continue;
5898 }
5899
5900 // Check for an undef mask and a mask value properly aligned to fit with
5901 // a pair of values. If we find such a case, use the non-undef mask's value.
5902 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {