Bug Summary

File:llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 36476, column 5
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-01-26-035717-31997-1 -x c++ /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/IntrinsicLowering.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineJumpTableInfo.h"
38#include "llvm/CodeGen/MachineLoopInfo.h"
39#include "llvm/CodeGen/MachineModuleInfo.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/TargetLowering.h"
42#include "llvm/CodeGen/WinEHFuncInfo.h"
43#include "llvm/IR/CallingConv.h"
44#include "llvm/IR/Constants.h"
45#include "llvm/IR/DerivedTypes.h"
46#include "llvm/IR/DiagnosticInfo.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalAlias.h"
49#include "llvm/IR/GlobalVariable.h"
50#include "llvm/IR/Instructions.h"
51#include "llvm/IR/Intrinsics.h"
52#include "llvm/MC/MCAsmInfo.h"
53#include "llvm/MC/MCContext.h"
54#include "llvm/MC/MCExpr.h"
55#include "llvm/MC/MCSymbol.h"
56#include "llvm/Support/CommandLine.h"
57#include "llvm/Support/Debug.h"
58#include "llvm/Support/ErrorHandling.h"
59#include "llvm/Support/KnownBits.h"
60#include "llvm/Support/MathExtras.h"
61#include "llvm/Target/TargetOptions.h"
62#include <algorithm>
63#include <bitset>
64#include <cctype>
65#include <numeric>
66using namespace llvm;
67
68#define DEBUG_TYPE"x86-isel" "x86-isel"
69
70STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
71
72static cl::opt<int> ExperimentalPrefLoopAlignment(
73 "x86-experimental-pref-loop-alignment", cl::init(4),
74 cl::desc(
75 "Sets the preferable loop alignment for experiments (as log2 bytes)"
76 "(the last x86-experimental-pref-loop-alignment bits"
77 " of the loop header PC will be 0)."),
78 cl::Hidden);
79
80static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
81 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
82 cl::desc(
83 "Sets the preferable loop alignment for experiments (as log2 bytes) "
84 "for innermost loops only. If specified, this option overrides "
85 "alignment set by x86-experimental-pref-loop-alignment."),
86 cl::Hidden);
87
88static cl::opt<bool> MulConstantOptimization(
89 "mul-constant-optimization", cl::init(true),
90 cl::desc("Replace 'mul x, Const' with more effective instructions like "
91 "SHIFT, LEA, etc."),
92 cl::Hidden);
93
94static cl::opt<bool> ExperimentalUnorderedISEL(
95 "x86-experimental-unordered-atomic-isel", cl::init(false),
96 cl::desc("Use LoadSDNode and StoreSDNode instead of "
97 "AtomicSDNode for unordered atomic loads and "
98 "stores respectively."),
99 cl::Hidden);
100
101/// Call this when the user attempts to do something unsupported, like
102/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
103/// report_fatal_error, so calling code should attempt to recover without
104/// crashing.
105static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
106 const char *Msg) {
107 MachineFunction &MF = DAG.getMachineFunction();
108 DAG.getContext()->diagnose(
109 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
110}
111
112X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
113 const X86Subtarget &STI)
114 : TargetLowering(TM), Subtarget(STI) {
115 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
116 X86ScalarSSEf64 = Subtarget.hasSSE2();
117 X86ScalarSSEf32 = Subtarget.hasSSE1();
118 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
119
120 // Set up the TargetLowering object.
121
122 // X86 is weird. It always uses i8 for shift amounts and setcc results.
123 setBooleanContents(ZeroOrOneBooleanContent);
124 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
125 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
126
127 // For 64-bit, since we have so many registers, use the ILP scheduler.
128 // For 32-bit, use the register pressure specific scheduling.
129 // For Atom, always use ILP scheduling.
130 if (Subtarget.isAtom())
131 setSchedulingPreference(Sched::ILP);
132 else if (Subtarget.is64Bit())
133 setSchedulingPreference(Sched::ILP);
134 else
135 setSchedulingPreference(Sched::RegPressure);
136 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
137 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
138
139 // Bypass expensive divides and use cheaper ones.
140 if (TM.getOptLevel() >= CodeGenOpt::Default) {
141 if (Subtarget.hasSlowDivide32())
142 addBypassSlowDiv(32, 8);
143 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
144 addBypassSlowDiv(64, 32);
145 }
146
147 // Setup Windows compiler runtime calls.
148 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
149 static const struct {
150 const RTLIB::Libcall Op;
151 const char * const Name;
152 const CallingConv::ID CC;
153 } LibraryCalls[] = {
154 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
155 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
156 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
157 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
158 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
159 };
160
161 for (const auto &LC : LibraryCalls) {
162 setLibcallName(LC.Op, LC.Name);
163 setLibcallCallingConv(LC.Op, LC.CC);
164 }
165 }
166
167 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
168 // MSVCRT doesn't have powi; fall back to pow
169 setLibcallName(RTLIB::POWI_F32, nullptr);
170 setLibcallName(RTLIB::POWI_F64, nullptr);
171 }
172
173 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
174 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
175 // FIXME: Should we be limiting the atomic size on other configs? Default is
176 // 1024.
177 if (!Subtarget.hasCmpxchg8b())
178 setMaxAtomicSizeInBitsSupported(32);
179
180 // Set up the register classes.
181 addRegisterClass(MVT::i8, &X86::GR8RegClass);
182 addRegisterClass(MVT::i16, &X86::GR16RegClass);
183 addRegisterClass(MVT::i32, &X86::GR32RegClass);
184 if (Subtarget.is64Bit())
185 addRegisterClass(MVT::i64, &X86::GR64RegClass);
186
187 for (MVT VT : MVT::integer_valuetypes())
188 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
189
190 // We don't accept any truncstore of integer registers.
191 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
192 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
193 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
194 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
195 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
196 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
197
198 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
199
200 // SETOEQ and SETUNE require checking two conditions.
201 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
202 setCondCodeAction(ISD::SETOEQ, VT, Expand);
203 setCondCodeAction(ISD::SETUNE, VT, Expand);
204 }
205
206 // Integer absolute.
207 if (Subtarget.hasCMov()) {
208 setOperationAction(ISD::ABS , MVT::i16 , Custom);
209 setOperationAction(ISD::ABS , MVT::i32 , Custom);
210 if (Subtarget.is64Bit())
211 setOperationAction(ISD::ABS , MVT::i64 , Custom);
212 }
213
214 // Funnel shifts.
215 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
216 // For slow shld targets we only lower for code size.
217 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
218
219 setOperationAction(ShiftOp , MVT::i8 , Custom);
220 setOperationAction(ShiftOp , MVT::i16 , Custom);
221 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
222 if (Subtarget.is64Bit())
223 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
224 }
225
226 if (!Subtarget.useSoftFloat()) {
227 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
228 // operation.
229 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
230 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
231 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
232 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
233 // We have an algorithm for SSE2, and we turn this into a 64-bit
234 // FILD or VCVTUSI2SS/SD for other targets.
235 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
236 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
237 // We have an algorithm for SSE2->double, and we turn this into a
238 // 64-bit FILD followed by conditional FADD for other targets.
239 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
240 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
241
242 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
243 // this operation.
244 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
245 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
246 // SSE has no i16 to fp conversion, only i32. We promote in the handler
247 // to allow f80 to use i16 and f64 to use i16 with sse1 only
248 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
249 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
250 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
251 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
252 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
253 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
254 // are Legal, f80 is custom lowered.
255 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
256 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
257
258 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
259 // this operation.
260 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
261 // FIXME: This doesn't generate invalid exception when it should. PR44019.
262 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
263 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
264 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
265 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
267 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
268 // are Legal, f80 is custom lowered.
269 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
270 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
271
272 // Handle FP_TO_UINT by promoting the destination to a larger signed
273 // conversion.
274 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
275 // FIXME: This doesn't generate invalid exception when it should. PR44019.
276 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
277 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
278 // FIXME: This doesn't generate invalid exception when it should. PR44019.
279 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
280 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
281 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
283 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
284
285 setOperationAction(ISD::LRINT, MVT::f32, Custom);
286 setOperationAction(ISD::LRINT, MVT::f64, Custom);
287 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
288 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
289
290 if (!Subtarget.is64Bit()) {
291 setOperationAction(ISD::LRINT, MVT::i64, Custom);
292 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
293 }
294 }
295
296 if (Subtarget.hasSSE2()) {
297 // Custom lowering for saturating float to int conversions.
298 // We handle promotion to larger result types manually.
299 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
300 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
301 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
302 }
303 if (Subtarget.is64Bit()) {
304 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
305 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
306 }
307 }
308
309 // Handle address space casts between mixed sized pointers.
310 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
311 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
312
313 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
314 if (!X86ScalarSSEf64) {
315 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
316 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
317 if (Subtarget.is64Bit()) {
318 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
319 // Without SSE, i64->f64 goes through memory.
320 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
321 }
322 } else if (!Subtarget.is64Bit())
323 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
324
325 // Scalar integer divide and remainder are lowered to use operations that
326 // produce two results, to match the available instructions. This exposes
327 // the two-result form to trivial CSE, which is able to combine x/y and x%y
328 // into a single instruction.
329 //
330 // Scalar integer multiply-high is also lowered to use two-result
331 // operations, to match the available instructions. However, plain multiply
332 // (low) operations are left as Legal, as there are single-result
333 // instructions for this in x86. Using the two-result multiply instructions
334 // when both high and low results are needed must be arranged by dagcombine.
335 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
336 setOperationAction(ISD::MULHS, VT, Expand);
337 setOperationAction(ISD::MULHU, VT, Expand);
338 setOperationAction(ISD::SDIV, VT, Expand);
339 setOperationAction(ISD::UDIV, VT, Expand);
340 setOperationAction(ISD::SREM, VT, Expand);
341 setOperationAction(ISD::UREM, VT, Expand);
342 }
343
344 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
345 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
346 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
347 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
348 setOperationAction(ISD::BR_CC, VT, Expand);
349 setOperationAction(ISD::SELECT_CC, VT, Expand);
350 }
351 if (Subtarget.is64Bit())
352 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
353 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
354 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
356
357 setOperationAction(ISD::FREM , MVT::f32 , Expand);
358 setOperationAction(ISD::FREM , MVT::f64 , Expand);
359 setOperationAction(ISD::FREM , MVT::f80 , Expand);
360 setOperationAction(ISD::FREM , MVT::f128 , Expand);
361 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
362
363 // Promote the i8 variants and force them on up to i32 which has a shorter
364 // encoding.
365 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
366 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
367 if (!Subtarget.hasBMI()) {
368 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
369 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
370 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
371 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
372 if (Subtarget.is64Bit()) {
373 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
374 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
375 }
376 }
377
378 if (Subtarget.hasLZCNT()) {
379 // When promoting the i8 variants, force them to i32 for a shorter
380 // encoding.
381 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
382 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
383 } else {
384 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
385 if (VT == MVT::i64 && !Subtarget.is64Bit())
386 continue;
387 setOperationAction(ISD::CTLZ , VT, Custom);
388 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
389 }
390 }
391
392 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
393 ISD::STRICT_FP_TO_FP16}) {
394 // Special handling for half-precision floating point conversions.
395 // If we don't have F16C support, then lower half float conversions
396 // into library calls.
397 setOperationAction(
398 Op, MVT::f32,
399 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
400 // There's never any support for operations beyond MVT::f32.
401 setOperationAction(Op, MVT::f64, Expand);
402 setOperationAction(Op, MVT::f80, Expand);
403 setOperationAction(Op, MVT::f128, Expand);
404 }
405
406 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
407 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
408 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
409 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
410 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
411 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
412 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
413 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
414
415 setOperationAction(ISD::PARITY, MVT::i8, Custom);
416 if (Subtarget.hasPOPCNT()) {
417 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
418 } else {
419 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
420 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
421 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
422 if (Subtarget.is64Bit())
423 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
424 else
425 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
426
427 setOperationAction(ISD::PARITY, MVT::i16, Custom);
428 setOperationAction(ISD::PARITY, MVT::i32, Custom);
429 if (Subtarget.is64Bit())
430 setOperationAction(ISD::PARITY, MVT::i64, Custom);
431 }
432
433 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
434
435 if (!Subtarget.hasMOVBE())
436 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
437
438 // X86 wants to expand cmov itself.
439 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
440 setOperationAction(ISD::SELECT, VT, Custom);
441 setOperationAction(ISD::SETCC, VT, Custom);
442 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
443 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
444 }
445 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
446 if (VT == MVT::i64 && !Subtarget.is64Bit())
447 continue;
448 setOperationAction(ISD::SELECT, VT, Custom);
449 setOperationAction(ISD::SETCC, VT, Custom);
450 }
451
452 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
453 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
454 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
455
456 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
457 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
458 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
459 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
460 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
461 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
462 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
463 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
464
465 // Darwin ABI issue.
466 for (auto VT : { MVT::i32, MVT::i64 }) {
467 if (VT == MVT::i64 && !Subtarget.is64Bit())
468 continue;
469 setOperationAction(ISD::ConstantPool , VT, Custom);
470 setOperationAction(ISD::JumpTable , VT, Custom);
471 setOperationAction(ISD::GlobalAddress , VT, Custom);
472 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
473 setOperationAction(ISD::ExternalSymbol , VT, Custom);
474 setOperationAction(ISD::BlockAddress , VT, Custom);
475 }
476
477 // 64-bit shl, sra, srl (iff 32-bit x86)
478 for (auto VT : { MVT::i32, MVT::i64 }) {
479 if (VT == MVT::i64 && !Subtarget.is64Bit())
480 continue;
481 setOperationAction(ISD::SHL_PARTS, VT, Custom);
482 setOperationAction(ISD::SRA_PARTS, VT, Custom);
483 setOperationAction(ISD::SRL_PARTS, VT, Custom);
484 }
485
486 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
487 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
488
489 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
490
491 // Expand certain atomics
492 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
493 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
494 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
495 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
496 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
497 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
498 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
499 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
500 }
501
502 if (!Subtarget.is64Bit())
503 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
504
505 if (Subtarget.hasCmpxchg16b()) {
506 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
507 }
508
509 // FIXME - use subtarget debug flags
510 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
511 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
512 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
513 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
514 }
515
516 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
517 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
518
519 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
520 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
521
522 setOperationAction(ISD::TRAP, MVT::Other, Legal);
523 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
524 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
525
526 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
527 setOperationAction(ISD::VASTART , MVT::Other, Custom);
528 setOperationAction(ISD::VAEND , MVT::Other, Expand);
529 bool Is64Bit = Subtarget.is64Bit();
530 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
531 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
532
533 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
534 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
535
536 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
537
538 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
539 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
540 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
541
542 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
543 // f32 and f64 use SSE.
544 // Set up the FP register classes.
545 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
546 : &X86::FR32RegClass);
547 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
548 : &X86::FR64RegClass);
549
550 // Disable f32->f64 extload as we can only generate this in one instruction
551 // under optsize. So its easier to pattern match (fpext (load)) for that
552 // case instead of needing to emit 2 instructions for extload in the
553 // non-optsize case.
554 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
555
556 for (auto VT : { MVT::f32, MVT::f64 }) {
557 // Use ANDPD to simulate FABS.
558 setOperationAction(ISD::FABS, VT, Custom);
559
560 // Use XORP to simulate FNEG.
561 setOperationAction(ISD::FNEG, VT, Custom);
562
563 // Use ANDPD and ORPD to simulate FCOPYSIGN.
564 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
565
566 // These might be better off as horizontal vector ops.
567 setOperationAction(ISD::FADD, VT, Custom);
568 setOperationAction(ISD::FSUB, VT, Custom);
569
570 // We don't support sin/cos/fmod
571 setOperationAction(ISD::FSIN , VT, Expand);
572 setOperationAction(ISD::FCOS , VT, Expand);
573 setOperationAction(ISD::FSINCOS, VT, Expand);
574 }
575
576 // Lower this to MOVMSK plus an AND.
577 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
578 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
579
580 } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
581 (UseX87 || Is64Bit)) {
582 // Use SSE for f32, x87 for f64.
583 // Set up the FP register classes.
584 addRegisterClass(MVT::f32, &X86::FR32RegClass);
585 if (UseX87)
586 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
587
588 // Use ANDPS to simulate FABS.
589 setOperationAction(ISD::FABS , MVT::f32, Custom);
590
591 // Use XORP to simulate FNEG.
592 setOperationAction(ISD::FNEG , MVT::f32, Custom);
593
594 if (UseX87)
595 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
596
597 // Use ANDPS and ORPS to simulate FCOPYSIGN.
598 if (UseX87)
599 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
600 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
601
602 // We don't support sin/cos/fmod
603 setOperationAction(ISD::FSIN , MVT::f32, Expand);
604 setOperationAction(ISD::FCOS , MVT::f32, Expand);
605 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
606
607 if (UseX87) {
608 // Always expand sin/cos functions even though x87 has an instruction.
609 setOperationAction(ISD::FSIN, MVT::f64, Expand);
610 setOperationAction(ISD::FCOS, MVT::f64, Expand);
611 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
612 }
613 } else if (UseX87) {
614 // f32 and f64 in x87.
615 // Set up the FP register classes.
616 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
617 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
618
619 for (auto VT : { MVT::f32, MVT::f64 }) {
620 setOperationAction(ISD::UNDEF, VT, Expand);
621 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
622
623 // Always expand sin/cos functions even though x87 has an instruction.
624 setOperationAction(ISD::FSIN , VT, Expand);
625 setOperationAction(ISD::FCOS , VT, Expand);
626 setOperationAction(ISD::FSINCOS, VT, Expand);
627 }
628 }
629
630 // Expand FP32 immediates into loads from the stack, save special cases.
631 if (isTypeLegal(MVT::f32)) {
632 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
633 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
634 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
635 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
636 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
637 } else // SSE immediates.
638 addLegalFPImmediate(APFloat(+0.0f)); // xorps
639 }
640 // Expand FP64 immediates into loads from the stack, save special cases.
641 if (isTypeLegal(MVT::f64)) {
642 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
643 addLegalFPImmediate(APFloat(+0.0)); // FLD0
644 addLegalFPImmediate(APFloat(+1.0)); // FLD1
645 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
646 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
647 } else // SSE immediates.
648 addLegalFPImmediate(APFloat(+0.0)); // xorpd
649 }
650 // Handle constrained floating-point operations of scalar.
651 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
652 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
653 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
654 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
655 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
656 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
657 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
658 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
659 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
660 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
661 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
662 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
663 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
664
665 // We don't support FMA.
666 setOperationAction(ISD::FMA, MVT::f64, Expand);
667 setOperationAction(ISD::FMA, MVT::f32, Expand);
668
669 // f80 always uses X87.
670 if (UseX87) {
671 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
672 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
673 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
674 {
675 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
676 addLegalFPImmediate(TmpFlt); // FLD0
677 TmpFlt.changeSign();
678 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
679
680 bool ignored;
681 APFloat TmpFlt2(+1.0);
682 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
683 &ignored);
684 addLegalFPImmediate(TmpFlt2); // FLD1
685 TmpFlt2.changeSign();
686 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
687 }
688
689 // Always expand sin/cos functions even though x87 has an instruction.
690 setOperationAction(ISD::FSIN , MVT::f80, Expand);
691 setOperationAction(ISD::FCOS , MVT::f80, Expand);
692 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
693
694 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
695 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
696 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
697 setOperationAction(ISD::FRINT, MVT::f80, Expand);
698 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
699 setOperationAction(ISD::FMA, MVT::f80, Expand);
700 setOperationAction(ISD::LROUND, MVT::f80, Expand);
701 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
702 setOperationAction(ISD::LRINT, MVT::f80, Custom);
703 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
704
705 // Handle constrained floating-point operations of scalar.
706 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
707 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
708 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
709 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
710 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
711 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
712 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
713 // as Custom.
714 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
715 }
716
717 // f128 uses xmm registers, but most operations require libcalls.
718 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
719 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
720 : &X86::VR128RegClass);
721
722 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
723
724 setOperationAction(ISD::FADD, MVT::f128, LibCall);
725 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
726 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
727 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
728 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
729 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
730 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
731 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
732 setOperationAction(ISD::FMA, MVT::f128, LibCall);
733 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
734
735 setOperationAction(ISD::FABS, MVT::f128, Custom);
736 setOperationAction(ISD::FNEG, MVT::f128, Custom);
737 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
738
739 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
740 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
741 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
742 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
743 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
744 // No STRICT_FSINCOS
745 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
746 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
747
748 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
749 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
750 // We need to custom handle any FP_ROUND with an f128 input, but
751 // LegalizeDAG uses the result type to know when to run a custom handler.
752 // So we have to list all legal floating point result types here.
753 if (isTypeLegal(MVT::f32)) {
754 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
755 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
756 }
757 if (isTypeLegal(MVT::f64)) {
758 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
759 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
760 }
761 if (isTypeLegal(MVT::f80)) {
762 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
763 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
764 }
765
766 setOperationAction(ISD::SETCC, MVT::f128, Custom);
767
768 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
769 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
770 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
771 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
772 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
773 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
774 }
775
776 // Always use a library call for pow.
777 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
778 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
779 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
780 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
781
782 setOperationAction(ISD::FLOG, MVT::f80, Expand);
783 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
784 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
785 setOperationAction(ISD::FEXP, MVT::f80, Expand);
786 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
787 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
788 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
789
790 // Some FP actions are always expanded for vector types.
791 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
792 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
793 setOperationAction(ISD::FSIN, VT, Expand);
794 setOperationAction(ISD::FSINCOS, VT, Expand);
795 setOperationAction(ISD::FCOS, VT, Expand);
796 setOperationAction(ISD::FREM, VT, Expand);
797 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
798 setOperationAction(ISD::FPOW, VT, Expand);
799 setOperationAction(ISD::FLOG, VT, Expand);
800 setOperationAction(ISD::FLOG2, VT, Expand);
801 setOperationAction(ISD::FLOG10, VT, Expand);
802 setOperationAction(ISD::FEXP, VT, Expand);
803 setOperationAction(ISD::FEXP2, VT, Expand);
804 }
805
806 // First set operation action for all vector types to either promote
807 // (for widening) or expand (for scalarization). Then we will selectively
808 // turn on ones that can be effectively codegen'd.
809 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
810 setOperationAction(ISD::SDIV, VT, Expand);
811 setOperationAction(ISD::UDIV, VT, Expand);
812 setOperationAction(ISD::SREM, VT, Expand);
813 setOperationAction(ISD::UREM, VT, Expand);
814 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
815 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
816 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
817 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
818 setOperationAction(ISD::FMA, VT, Expand);
819 setOperationAction(ISD::FFLOOR, VT, Expand);
820 setOperationAction(ISD::FCEIL, VT, Expand);
821 setOperationAction(ISD::FTRUNC, VT, Expand);
822 setOperationAction(ISD::FRINT, VT, Expand);
823 setOperationAction(ISD::FNEARBYINT, VT, Expand);
824 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
825 setOperationAction(ISD::MULHS, VT, Expand);
826 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
827 setOperationAction(ISD::MULHU, VT, Expand);
828 setOperationAction(ISD::SDIVREM, VT, Expand);
829 setOperationAction(ISD::UDIVREM, VT, Expand);
830 setOperationAction(ISD::CTPOP, VT, Expand);
831 setOperationAction(ISD::CTTZ, VT, Expand);
832 setOperationAction(ISD::CTLZ, VT, Expand);
833 setOperationAction(ISD::ROTL, VT, Expand);
834 setOperationAction(ISD::ROTR, VT, Expand);
835 setOperationAction(ISD::BSWAP, VT, Expand);
836 setOperationAction(ISD::SETCC, VT, Expand);
837 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
838 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
839 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
840 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
841 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
842 setOperationAction(ISD::TRUNCATE, VT, Expand);
843 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
844 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
845 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
846 setOperationAction(ISD::SELECT_CC, VT, Expand);
847 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
848 setTruncStoreAction(InnerVT, VT, Expand);
849
850 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
851 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
852
853 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
854 // types, we have to deal with them whether we ask for Expansion or not.
855 // Setting Expand causes its own optimisation problems though, so leave
856 // them legal.
857 if (VT.getVectorElementType() == MVT::i1)
858 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
859
860 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
861 // split/scalarized right now.
862 if (VT.getVectorElementType() == MVT::f16)
863 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
864 }
865 }
866
867 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
868 // with -msoft-float, disable use of MMX as well.
869 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
870 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
871 // No operations on x86mmx supported, everything uses intrinsics.
872 }
873
874 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
875 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
876 : &X86::VR128RegClass);
877
878 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
879 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
880 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
881 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
882 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
883 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
884 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
885 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
886
887 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
888 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
889
890 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
891 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
892 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
893 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
894 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
895 }
896
897 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
898 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
899 : &X86::VR128RegClass);
900
901 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
902 // registers cannot be used even for integer operations.
903 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
904 : &X86::VR128RegClass);
905 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
906 : &X86::VR128RegClass);
907 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
908 : &X86::VR128RegClass);
909 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
910 : &X86::VR128RegClass);
911
912 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
913 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
914 setOperationAction(ISD::SDIV, VT, Custom);
915 setOperationAction(ISD::SREM, VT, Custom);
916 setOperationAction(ISD::UDIV, VT, Custom);
917 setOperationAction(ISD::UREM, VT, Custom);
918 }
919
920 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
921 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
922 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
923
924 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
925 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
926 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
927 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
928 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
929 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
930 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
931 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
932 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
933 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
934 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
935 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
936 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
937
938 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
939 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
940 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
941 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
942 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
943 }
944
945 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
946 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
947 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
948 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
949 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
950 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
951 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
952 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
953 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
954 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
955
956 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
957 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
958 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
959
960 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
961 setOperationAction(ISD::SETCC, VT, Custom);
962 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
963 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
964 setOperationAction(ISD::CTPOP, VT, Custom);
965 setOperationAction(ISD::ABS, VT, Custom);
966
967 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
968 // setcc all the way to isel and prefer SETGT in some isel patterns.
969 setCondCodeAction(ISD::SETLT, VT, Custom);
970 setCondCodeAction(ISD::SETLE, VT, Custom);
971 }
972
973 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
974 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
975 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
976 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
977 setOperationAction(ISD::VSELECT, VT, Custom);
978 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
979 }
980
981 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
982 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
983 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
984 setOperationAction(ISD::VSELECT, VT, Custom);
985
986 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
987 continue;
988
989 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
990 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
991 }
992
993 // Custom lower v2i64 and v2f64 selects.
994 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
995 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
996 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
997 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
998 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
999
1000 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1001 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1002 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
1003 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1004
1005 // Custom legalize these to avoid over promotion or custom promotion.
1006 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1007 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1008 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1009 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1010 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1011 }
1012
1013 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1014 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
1015 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1016 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1017
1018 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1019 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1020
1021 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1022 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1023
1024 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1025 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1026 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1027 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1028 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1029
1030 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1031 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1032 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1033 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1034
1035 // We want to legalize this to an f64 load rather than an i64 load on
1036 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1037 // store.
1038 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1039 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1040 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1041 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1042 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1043 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1044
1045 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1046 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1047 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1048 if (!Subtarget.hasAVX512())
1049 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1050
1051 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1052 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1053 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1054
1055 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1056
1057 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1058 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1059 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1060 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1061 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1062 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1063
1064 // In the customized shift lowering, the legal v4i32/v2i64 cases
1065 // in AVX2 will be recognized.
1066 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1067 setOperationAction(ISD::SRL, VT, Custom);
1068 setOperationAction(ISD::SHL, VT, Custom);
1069 setOperationAction(ISD::SRA, VT, Custom);
1070 }
1071
1072 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1073 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1074
1075 // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
1076 // shifts) is better.
1077 if (!Subtarget.useAVX512Regs() &&
1078 !(Subtarget.hasBWI() && Subtarget.hasVLX()))
1079 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1080
1081 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1082 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1083 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1084 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1085 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1086 }
1087
1088 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1089 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1090 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1091 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1092 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1093 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1094 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1095 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1096 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1097
1098 // These might be better off as horizontal vector ops.
1099 setOperationAction(ISD::ADD, MVT::i16, Custom);
1100 setOperationAction(ISD::ADD, MVT::i32, Custom);
1101 setOperationAction(ISD::SUB, MVT::i16, Custom);
1102 setOperationAction(ISD::SUB, MVT::i32, Custom);
1103 }
1104
1105 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1106 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1107 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1108 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1109 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1110 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1111 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1112 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1113 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1114 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1115 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1116 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1117 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1118 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1119
1120 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1121 }
1122
1123 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1124 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1125 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1126 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1127 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1128 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1129 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1130 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1131
1132 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1133
1134 // FIXME: Do we need to handle scalar-to-vector here?
1135 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1136
1137 // We directly match byte blends in the backend as they match the VSELECT
1138 // condition form.
1139 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1140
1141 // SSE41 brings specific instructions for doing vector sign extend even in
1142 // cases where we don't have SRA.
1143 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1144 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1145 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1146 }
1147
1148 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1149 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1150 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1151 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1152 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1153 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1154 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1155 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1156 }
1157
1158 // i8 vectors are custom because the source register and source
1159 // source memory operand types are not the same width.
1160 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1161
1162 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1163 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1164 // do the pre and post work in the vector domain.
1165 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1166 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1167 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1168 // so that DAG combine doesn't try to turn it into uint_to_fp.
1169 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1170 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1171 }
1172 }
1173
1174 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1175 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1176 }
1177
1178 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1179 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1180 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1181 setOperationAction(ISD::ROTL, VT, Custom);
1182
1183 // XOP can efficiently perform BITREVERSE with VPPERM.
1184 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1185 setOperationAction(ISD::BITREVERSE, VT, Custom);
1186
1187 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1188 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1189 setOperationAction(ISD::BITREVERSE, VT, Custom);
1190 }
1191
1192 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1193 bool HasInt256 = Subtarget.hasInt256();
1194
1195 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1196 : &X86::VR256RegClass);
1197 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1198 : &X86::VR256RegClass);
1199 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1200 : &X86::VR256RegClass);
1201 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1202 : &X86::VR256RegClass);
1203 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1204 : &X86::VR256RegClass);
1205 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1206 : &X86::VR256RegClass);
1207
1208 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1209 setOperationAction(ISD::FFLOOR, VT, Legal);
1210 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1211 setOperationAction(ISD::FCEIL, VT, Legal);
1212 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1213 setOperationAction(ISD::FTRUNC, VT, Legal);
1214 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1215 setOperationAction(ISD::FRINT, VT, Legal);
1216 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1217 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1218 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1219 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1220 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1221
1222 setOperationAction(ISD::FROUND, VT, Custom);
1223
1224 setOperationAction(ISD::FNEG, VT, Custom);
1225 setOperationAction(ISD::FABS, VT, Custom);
1226 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1227 }
1228
1229 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1230 // even though v8i16 is a legal type.
1231 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1232 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1233 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1234 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1235 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1236 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1237
1238 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1239 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1240
1241 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1242 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1243 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1244 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1245 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1246 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1247 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1248 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1249 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1250 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1251 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1252 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1253
1254 if (!Subtarget.hasAVX512())
1255 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1256
1257 // In the customized shift lowering, the legal v8i32/v4i64 cases
1258 // in AVX2 will be recognized.
1259 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1260 setOperationAction(ISD::SRL, VT, Custom);
1261 setOperationAction(ISD::SHL, VT, Custom);
1262 setOperationAction(ISD::SRA, VT, Custom);
1263 }
1264
1265 // These types need custom splitting if their input is a 128-bit vector.
1266 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1267 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1268 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1269 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1270
1271 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1272 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1273
1274 // With BWI, expanding (and promoting the shifts) is the better.
1275 if (!Subtarget.useBWIRegs())
1276 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1277
1278 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1279 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1280 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1281 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1282 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1283 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1284
1285 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1286 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1287 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1288 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1289 }
1290
1291 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1292 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1293 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1294 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1295
1296 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1297 setOperationAction(ISD::SETCC, VT, Custom);
1298 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1299 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1300 setOperationAction(ISD::CTPOP, VT, Custom);
1301 setOperationAction(ISD::CTLZ, VT, Custom);
1302
1303 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1304 // setcc all the way to isel and prefer SETGT in some isel patterns.
1305 setCondCodeAction(ISD::SETLT, VT, Custom);
1306 setCondCodeAction(ISD::SETLE, VT, Custom);
1307 }
1308
1309 if (Subtarget.hasAnyFMA()) {
1310 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1311 MVT::v2f64, MVT::v4f64 }) {
1312 setOperationAction(ISD::FMA, VT, Legal);
1313 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1314 }
1315 }
1316
1317 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1318 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1319 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1320 }
1321
1322 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1323 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1324 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1325 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1326
1327 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1328 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1329 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1330 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1331 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1332 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1333
1334 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1335 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1336 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1337 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1338 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1339
1340 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1341 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1342 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1343 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1344 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1345 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1346 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1347 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1348 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1349 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1350 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1351 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1352
1353 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1354 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1355 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1356 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1357 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1358 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1359 }
1360
1361 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1362 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1363 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1364 }
1365
1366 if (HasInt256) {
1367 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1368 // when we have a 256bit-wide blend with immediate.
1369 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1370 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1371
1372 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1373 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1374 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1375 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1376 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1377 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1378 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1379 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1380 }
1381 }
1382
1383 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1384 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1385 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1386 setOperationAction(ISD::MSTORE, VT, Legal);
1387 }
1388
1389 // Extract subvector is special because the value type
1390 // (result) is 128-bit but the source is 256-bit wide.
1391 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1392 MVT::v4f32, MVT::v2f64 }) {
1393 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1394 }
1395
1396 // Custom lower several nodes for 256-bit types.
1397 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1398 MVT::v8f32, MVT::v4f64 }) {
1399 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1400 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1401 setOperationAction(ISD::VSELECT, VT, Custom);
1402 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1403 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1404 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1405 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1406 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1407 setOperationAction(ISD::STORE, VT, Custom);
1408 }
1409
1410 if (HasInt256) {
1411 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1412
1413 // Custom legalize 2x32 to get a little better code.
1414 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1415 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1416
1417 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1418 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1419 setOperationAction(ISD::MGATHER, VT, Custom);
1420 }
1421 }
1422
1423 // This block controls legalization of the mask vector sizes that are
1424 // available with AVX512. 512-bit vectors are in a separate block controlled
1425 // by useAVX512Regs.
1426 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1427 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1428 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1429 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1430 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1431 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1432
1433 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1434 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1435 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1436
1437 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1438 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1439 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1440 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1441 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1442 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1443 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1444 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1445 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1446 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1447 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1448 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1449
1450 // There is no byte sized k-register load or store without AVX512DQ.
1451 if (!Subtarget.hasDQI()) {
1452 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1453 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1454 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1455 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1456
1457 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1458 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1459 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1460 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1461 }
1462
1463 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1464 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1465 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1466 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1467 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1468 }
1469
1470 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1471 setOperationAction(ISD::ADD, VT, Custom);
1472 setOperationAction(ISD::SUB, VT, Custom);
1473 setOperationAction(ISD::MUL, VT, Custom);
1474 setOperationAction(ISD::UADDSAT, VT, Custom);
1475 setOperationAction(ISD::SADDSAT, VT, Custom);
1476 setOperationAction(ISD::USUBSAT, VT, Custom);
1477 setOperationAction(ISD::SSUBSAT, VT, Custom);
1478 setOperationAction(ISD::VSELECT, VT, Expand);
1479 }
1480
1481 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1482 setOperationAction(ISD::SETCC, VT, Custom);
1483 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1484 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1485 setOperationAction(ISD::SELECT, VT, Custom);
1486 setOperationAction(ISD::TRUNCATE, VT, Custom);
1487
1488 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1489 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1490 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1491 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1492 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1493 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1494 }
1495
1496 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1497 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1498 }
1499
1500 // This block controls legalization for 512-bit operations with 32/64 bit
1501 // elements. 512-bits can be disabled based on prefer-vector-width and
1502 // required-vector-width function attributes.
1503 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1504 bool HasBWI = Subtarget.hasBWI();
1505
1506 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1507 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1508 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1509 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1510 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1511 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1512
1513 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1514 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1515 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1516 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1517 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1518 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1519 if (HasBWI)
1520 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1521 }
1522
1523 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1524 setOperationAction(ISD::FNEG, VT, Custom);
1525 setOperationAction(ISD::FABS, VT, Custom);
1526 setOperationAction(ISD::FMA, VT, Legal);
1527 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1528 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1529 }
1530
1531 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1532 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1533 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1534 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1535 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1536 }
1537 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1538 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1539 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1540 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1541 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1542 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1543 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1544 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1545
1546 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1547 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1548 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1549 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1550 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1551 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1552 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1553 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1554 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1555 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1556 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1557 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1558
1559 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1560 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1561 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1562 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1563 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1564 if (HasBWI)
1565 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1566
1567 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1568 // to 512-bit rather than use the AVX2 instructions so that we can use
1569 // k-masks.
1570 if (!Subtarget.hasVLX()) {
1571 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1572 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1573 setOperationAction(ISD::MLOAD, VT, Custom);
1574 setOperationAction(ISD::MSTORE, VT, Custom);
1575 }
1576 }
1577
1578 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1579 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1580 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1581 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1582 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1583 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1584 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1585 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1586 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1587 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1588 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1589 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1590 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1591
1592 if (HasBWI) {
1593 // Extends from v64i1 masks to 512-bit vectors.
1594 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1595 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1596 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1597 }
1598
1599 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1600 setOperationAction(ISD::FFLOOR, VT, Legal);
1601 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1602 setOperationAction(ISD::FCEIL, VT, Legal);
1603 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1604 setOperationAction(ISD::FTRUNC, VT, Legal);
1605 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1606 setOperationAction(ISD::FRINT, VT, Legal);
1607 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1608 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1609 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1610 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1611 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1612
1613 setOperationAction(ISD::FROUND, VT, Custom);
1614 }
1615
1616 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1617 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1618 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1619 }
1620
1621 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1622 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1623 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1624 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1625
1626 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1627 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1628 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1629 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1630
1631 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1632 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1633 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1634 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1635 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1636 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1637
1638 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1639
1640 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1641 setOperationAction(ISD::SRL, VT, Custom);
1642 setOperationAction(ISD::SHL, VT, Custom);
1643 setOperationAction(ISD::SRA, VT, Custom);
1644 setOperationAction(ISD::SETCC, VT, Custom);
1645
1646 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1647 // setcc all the way to isel and prefer SETGT in some isel patterns.
1648 setCondCodeAction(ISD::SETLT, VT, Custom);
1649 setCondCodeAction(ISD::SETLE, VT, Custom);
1650 }
1651 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1652 setOperationAction(ISD::SMAX, VT, Legal);
1653 setOperationAction(ISD::UMAX, VT, Legal);
1654 setOperationAction(ISD::SMIN, VT, Legal);
1655 setOperationAction(ISD::UMIN, VT, Legal);
1656 setOperationAction(ISD::ABS, VT, Legal);
1657 setOperationAction(ISD::CTPOP, VT, Custom);
1658 setOperationAction(ISD::ROTL, VT, Custom);
1659 setOperationAction(ISD::ROTR, VT, Custom);
1660 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1661 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1662 }
1663
1664 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1665 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1666 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1667 setOperationAction(ISD::CTLZ, VT, Custom);
1668 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1669 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1670 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1671 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1672 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1673 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1674 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1675 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1676 }
1677
1678 if (Subtarget.hasDQI()) {
1679 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1680 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1681 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1682 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1683 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1684 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1685 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1686 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1687
1688 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1689 }
1690
1691 if (Subtarget.hasCDI()) {
1692 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1693 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1694 setOperationAction(ISD::CTLZ, VT, Legal);
1695 }
1696 } // Subtarget.hasCDI()
1697
1698 if (Subtarget.hasVPOPCNTDQ()) {
1699 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1700 setOperationAction(ISD::CTPOP, VT, Legal);
1701 }
1702
1703 // Extract subvector is special because the value type
1704 // (result) is 256-bit but the source is 512-bit wide.
1705 // 128-bit was made Legal under AVX1.
1706 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1707 MVT::v8f32, MVT::v4f64 })
1708 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1709
1710 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1711 MVT::v16f32, MVT::v8f64 }) {
1712 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1713 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1714 setOperationAction(ISD::SELECT, VT, Custom);
1715 setOperationAction(ISD::VSELECT, VT, Custom);
1716 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1717 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1718 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1719 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1720 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1721 }
1722
1723 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1724 setOperationAction(ISD::MLOAD, VT, Legal);
1725 setOperationAction(ISD::MSTORE, VT, Legal);
1726 setOperationAction(ISD::MGATHER, VT, Custom);
1727 setOperationAction(ISD::MSCATTER, VT, Custom);
1728 }
1729 if (HasBWI) {
1730 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1731 setOperationAction(ISD::MLOAD, VT, Legal);
1732 setOperationAction(ISD::MSTORE, VT, Legal);
1733 }
1734 } else {
1735 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1736 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1737 }
1738
1739 if (Subtarget.hasVBMI2()) {
1740 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1741 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1742 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1743 setOperationAction(ISD::FSHL, VT, Custom);
1744 setOperationAction(ISD::FSHR, VT, Custom);
1745 }
1746
1747 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1748 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1749 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1750 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1751 }
1752 }// useAVX512Regs
1753
1754 // This block controls legalization for operations that don't have
1755 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1756 // narrower widths.
1757 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1758 // These operations are handled on non-VLX by artificially widening in
1759 // isel patterns.
1760
1761 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1762 Subtarget.hasVLX() ? Legal : Custom);
1763 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1764 Subtarget.hasVLX() ? Legal : Custom);
1765 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1766 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1767 Subtarget.hasVLX() ? Legal : Custom);
1768 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1769 Subtarget.hasVLX() ? Legal : Custom);
1770 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1771 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1772 Subtarget.hasVLX() ? Legal : Custom);
1773 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1774 Subtarget.hasVLX() ? Legal : Custom);
1775 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1776 Subtarget.hasVLX() ? Legal : Custom);
1777 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1778 Subtarget.hasVLX() ? Legal : Custom);
1779
1780 if (Subtarget.hasDQI()) {
1781 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1782 // v2f32 UINT_TO_FP is already custom under SSE2.
1783 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1785, __PRETTY_FUNCTION__))
1784 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1785, __PRETTY_FUNCTION__))
1785 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1785, __PRETTY_FUNCTION__))
;
1786 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1787 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1788 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1789 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1790 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1791 }
1792
1793 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1794 setOperationAction(ISD::SMAX, VT, Legal);
1795 setOperationAction(ISD::UMAX, VT, Legal);
1796 setOperationAction(ISD::SMIN, VT, Legal);
1797 setOperationAction(ISD::UMIN, VT, Legal);
1798 setOperationAction(ISD::ABS, VT, Legal);
1799 }
1800
1801 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1802 setOperationAction(ISD::ROTL, VT, Custom);
1803 setOperationAction(ISD::ROTR, VT, Custom);
1804 }
1805
1806 // Custom legalize 2x32 to get a little better code.
1807 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1808 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1809
1810 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1811 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1812 setOperationAction(ISD::MSCATTER, VT, Custom);
1813
1814 if (Subtarget.hasDQI()) {
1815 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1816 setOperationAction(ISD::SINT_TO_FP, VT,
1817 Subtarget.hasVLX() ? Legal : Custom);
1818 setOperationAction(ISD::UINT_TO_FP, VT,
1819 Subtarget.hasVLX() ? Legal : Custom);
1820 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1821 Subtarget.hasVLX() ? Legal : Custom);
1822 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1823 Subtarget.hasVLX() ? Legal : Custom);
1824 setOperationAction(ISD::FP_TO_SINT, VT,
1825 Subtarget.hasVLX() ? Legal : Custom);
1826 setOperationAction(ISD::FP_TO_UINT, VT,
1827 Subtarget.hasVLX() ? Legal : Custom);
1828 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1829 Subtarget.hasVLX() ? Legal : Custom);
1830 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1831 Subtarget.hasVLX() ? Legal : Custom);
1832 setOperationAction(ISD::MUL, VT, Legal);
1833 }
1834 }
1835
1836 if (Subtarget.hasCDI()) {
1837 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1838 setOperationAction(ISD::CTLZ, VT, Legal);
1839 }
1840 } // Subtarget.hasCDI()
1841
1842 if (Subtarget.hasVPOPCNTDQ()) {
1843 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1844 setOperationAction(ISD::CTPOP, VT, Legal);
1845 }
1846 }
1847
1848 // This block control legalization of v32i1/v64i1 which are available with
1849 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1850 // useBWIRegs.
1851 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1852 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1853 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1854
1855 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1856 setOperationAction(ISD::ADD, VT, Custom);
1857 setOperationAction(ISD::SUB, VT, Custom);
1858 setOperationAction(ISD::MUL, VT, Custom);
1859 setOperationAction(ISD::VSELECT, VT, Expand);
1860 setOperationAction(ISD::UADDSAT, VT, Custom);
1861 setOperationAction(ISD::SADDSAT, VT, Custom);
1862 setOperationAction(ISD::USUBSAT, VT, Custom);
1863 setOperationAction(ISD::SSUBSAT, VT, Custom);
1864
1865 setOperationAction(ISD::TRUNCATE, VT, Custom);
1866 setOperationAction(ISD::SETCC, VT, Custom);
1867 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1868 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1869 setOperationAction(ISD::SELECT, VT, Custom);
1870 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1871 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1872 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1873 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1874 }
1875
1876 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1877 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1878
1879 // Extends from v32i1 masks to 256-bit vectors.
1880 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1881 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1882 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1883
1884 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1885 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1886 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1887 }
1888
1889 // These operations are handled on non-VLX by artificially widening in
1890 // isel patterns.
1891 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1892
1893 if (Subtarget.hasBITALG()) {
1894 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1895 setOperationAction(ISD::CTPOP, VT, Legal);
1896 }
1897 }
1898
1899 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1900 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1901 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1902 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1903 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1904 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1905
1906 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1907 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1908 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1909 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1910 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1911
1912 if (Subtarget.hasBWI()) {
1913 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1914 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1915 }
1916
1917 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1918 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1919 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1920 }
1921
1922 if (Subtarget.hasAMXTILE()) {
1923 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
1924 }
1925
1926 // We want to custom lower some of our intrinsics.
1927 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1928 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1929 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1930 if (!Subtarget.is64Bit()) {
1931 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1932 }
1933
1934 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1935 // handle type legalization for these operations here.
1936 //
1937 // FIXME: We really should do custom legalization for addition and
1938 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1939 // than generic legalization for 64-bit multiplication-with-overflow, though.
1940 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1941 if (VT == MVT::i64 && !Subtarget.is64Bit())
1942 continue;
1943 // Add/Sub/Mul with overflow operations are custom lowered.
1944 setOperationAction(ISD::SADDO, VT, Custom);
1945 setOperationAction(ISD::UADDO, VT, Custom);
1946 setOperationAction(ISD::SSUBO, VT, Custom);
1947 setOperationAction(ISD::USUBO, VT, Custom);
1948 setOperationAction(ISD::SMULO, VT, Custom);
1949 setOperationAction(ISD::UMULO, VT, Custom);
1950
1951 // Support carry in as value rather than glue.
1952 setOperationAction(ISD::ADDCARRY, VT, Custom);
1953 setOperationAction(ISD::SUBCARRY, VT, Custom);
1954 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1955 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
1956 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
1957 }
1958
1959 if (!Subtarget.is64Bit()) {
1960 // These libcalls are not available in 32-bit.
1961 setLibcallName(RTLIB::SHL_I128, nullptr);
1962 setLibcallName(RTLIB::SRL_I128, nullptr);
1963 setLibcallName(RTLIB::SRA_I128, nullptr);
1964 setLibcallName(RTLIB::MUL_I128, nullptr);
1965 }
1966
1967 // Combine sin / cos into _sincos_stret if it is available.
1968 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1969 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1970 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1971 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1972 }
1973
1974 if (Subtarget.isTargetWin64()) {
1975 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1976 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1977 setOperationAction(ISD::SREM, MVT::i128, Custom);
1978 setOperationAction(ISD::UREM, MVT::i128, Custom);
1979 }
1980
1981 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1982 // is. We should promote the value to 64-bits to solve this.
1983 // This is what the CRT headers do - `fmodf` is an inline header
1984 // function casting to f64 and calling `fmod`.
1985 if (Subtarget.is32Bit() &&
1986 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
1987 for (ISD::NodeType Op :
1988 {ISD::FCEIL, ISD::STRICT_FCEIL,
1989 ISD::FCOS, ISD::STRICT_FCOS,
1990 ISD::FEXP, ISD::STRICT_FEXP,
1991 ISD::FFLOOR, ISD::STRICT_FFLOOR,
1992 ISD::FREM, ISD::STRICT_FREM,
1993 ISD::FLOG, ISD::STRICT_FLOG,
1994 ISD::FLOG10, ISD::STRICT_FLOG10,
1995 ISD::FPOW, ISD::STRICT_FPOW,
1996 ISD::FSIN, ISD::STRICT_FSIN})
1997 if (isOperationExpand(Op, MVT::f32))
1998 setOperationAction(Op, MVT::f32, Promote);
1999
2000 // We have target-specific dag combine patterns for the following nodes:
2001 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
2002 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
2003 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
2004 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
2005 setTargetDAGCombine(ISD::CONCAT_VECTORS);
2006 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
2007 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
2008 setTargetDAGCombine(ISD::BITCAST);
2009 setTargetDAGCombine(ISD::VSELECT);
2010 setTargetDAGCombine(ISD::SELECT);
2011 setTargetDAGCombine(ISD::SHL);
2012 setTargetDAGCombine(ISD::SRA);
2013 setTargetDAGCombine(ISD::SRL);
2014 setTargetDAGCombine(ISD::OR);
2015 setTargetDAGCombine(ISD::AND);
2016 setTargetDAGCombine(ISD::ADD);
2017 setTargetDAGCombine(ISD::FADD);
2018 setTargetDAGCombine(ISD::FSUB);
2019 setTargetDAGCombine(ISD::FNEG);
2020 setTargetDAGCombine(ISD::FMA);
2021 setTargetDAGCombine(ISD::STRICT_FMA);
2022 setTargetDAGCombine(ISD::FMINNUM);
2023 setTargetDAGCombine(ISD::FMAXNUM);
2024 setTargetDAGCombine(ISD::SUB);
2025 setTargetDAGCombine(ISD::LOAD);
2026 setTargetDAGCombine(ISD::MLOAD);
2027 setTargetDAGCombine(ISD::STORE);
2028 setTargetDAGCombine(ISD::MSTORE);
2029 setTargetDAGCombine(ISD::TRUNCATE);
2030 setTargetDAGCombine(ISD::ZERO_EXTEND);
2031 setTargetDAGCombine(ISD::ANY_EXTEND);
2032 setTargetDAGCombine(ISD::SIGN_EXTEND);
2033 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2034 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2035 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2036 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2037 setTargetDAGCombine(ISD::SINT_TO_FP);
2038 setTargetDAGCombine(ISD::UINT_TO_FP);
2039 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2040 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2041 setTargetDAGCombine(ISD::SETCC);
2042 setTargetDAGCombine(ISD::MUL);
2043 setTargetDAGCombine(ISD::XOR);
2044 setTargetDAGCombine(ISD::MSCATTER);
2045 setTargetDAGCombine(ISD::MGATHER);
2046 setTargetDAGCombine(ISD::FP16_TO_FP);
2047 setTargetDAGCombine(ISD::FP_EXTEND);
2048 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
2049 setTargetDAGCombine(ISD::FP_ROUND);
2050
2051 computeRegisterProperties(Subtarget.getRegisterInfo());
2052
2053 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2054 MaxStoresPerMemsetOptSize = 8;
2055 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2056 MaxStoresPerMemcpyOptSize = 4;
2057 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2058 MaxStoresPerMemmoveOptSize = 4;
2059
2060 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2061 // that needs to benchmarked and balanced with the potential use of vector
2062 // load/store types (PR33329, PR33914).
2063 MaxLoadsPerMemcmp = 2;
2064 MaxLoadsPerMemcmpOptSize = 2;
2065
2066 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
2067 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
2068
2069 // An out-of-order CPU can speculatively execute past a predictable branch,
2070 // but a conditional move could be stalled by an expensive earlier operation.
2071 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2072 EnableExtLdPromotion = true;
2073 setPrefFunctionAlignment(Align(16));
2074
2075 verifyIntrinsicTables();
2076
2077 // Default to having -disable-strictnode-mutation on
2078 IsStrictFPEnabled = true;
2079}
2080
2081// This has so far only been implemented for 64-bit MachO.
2082bool X86TargetLowering::useLoadStackGuardNode() const {
2083 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2084}
2085
2086bool X86TargetLowering::useStackGuardXorFP() const {
2087 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2088 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2089}
2090
2091SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2092 const SDLoc &DL) const {
2093 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2094 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2095 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2096 return SDValue(Node, 0);
2097}
2098
2099TargetLoweringBase::LegalizeTypeAction
2100X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2101 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2102 !Subtarget.hasBWI())
2103 return TypeSplitVector;
2104
2105 if (VT.getVectorNumElements() != 1 &&
2106 VT.getVectorElementType() != MVT::i1)
2107 return TypeWidenVector;
2108
2109 return TargetLoweringBase::getPreferredVectorAction(VT);
2110}
2111
2112static std::pair<MVT, unsigned>
2113handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2114 const X86Subtarget &Subtarget) {
2115 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2116 // convention is one that uses k registers.
2117 if (NumElts == 2)
2118 return {MVT::v2i64, 1};
2119 if (NumElts == 4)
2120 return {MVT::v4i32, 1};
2121 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2122 CC != CallingConv::Intel_OCL_BI)
2123 return {MVT::v8i16, 1};
2124 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2125 CC != CallingConv::Intel_OCL_BI)
2126 return {MVT::v16i8, 1};
2127 // v32i1 passes in ymm unless we have BWI and the calling convention is
2128 // regcall.
2129 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2130 return {MVT::v32i8, 1};
2131 // Split v64i1 vectors if we don't have v64i8 available.
2132 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2133 if (Subtarget.useAVX512Regs())
2134 return {MVT::v64i8, 1};
2135 return {MVT::v32i8, 2};
2136 }
2137
2138 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2139 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2140 NumElts > 64)
2141 return {MVT::i8, NumElts};
2142
2143 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2144}
2145
2146MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2147 CallingConv::ID CC,
2148 EVT VT) const {
2149 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2150 Subtarget.hasAVX512()) {
2151 unsigned NumElts = VT.getVectorNumElements();
2152
2153 MVT RegisterVT;
2154 unsigned NumRegisters;
2155 std::tie(RegisterVT, NumRegisters) =
2156 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2157 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2158 return RegisterVT;
2159 }
2160
2161 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2162}
2163
2164unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2165 CallingConv::ID CC,
2166 EVT VT) const {
2167 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2168 Subtarget.hasAVX512()) {
2169 unsigned NumElts = VT.getVectorNumElements();
2170
2171 MVT RegisterVT;
2172 unsigned NumRegisters;
2173 std::tie(RegisterVT, NumRegisters) =
2174 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2175 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2176 return NumRegisters;
2177 }
2178
2179 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2180}
2181
2182unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2183 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2184 unsigned &NumIntermediates, MVT &RegisterVT) const {
2185 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2186 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2187 Subtarget.hasAVX512() &&
2188 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2189 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2190 VT.getVectorNumElements() > 64)) {
2191 RegisterVT = MVT::i8;
2192 IntermediateVT = MVT::i1;
2193 NumIntermediates = VT.getVectorNumElements();
2194 return NumIntermediates;
2195 }
2196
2197 // Split v64i1 vectors if we don't have v64i8 available.
2198 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2199 CC != CallingConv::X86_RegCall) {
2200 RegisterVT = MVT::v32i8;
2201 IntermediateVT = MVT::v32i1;
2202 NumIntermediates = 2;
2203 return 2;
2204 }
2205
2206 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2207 NumIntermediates, RegisterVT);
2208}
2209
2210EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2211 LLVMContext& Context,
2212 EVT VT) const {
2213 if (!VT.isVector())
2214 return MVT::i8;
2215
2216 if (Subtarget.hasAVX512()) {
2217 const unsigned NumElts = VT.getVectorNumElements();
2218
2219 // Figure out what this type will be legalized to.
2220 EVT LegalVT = VT;
2221 while (getTypeAction(Context, LegalVT) != TypeLegal)
2222 LegalVT = getTypeToTransformTo(Context, LegalVT);
2223
2224 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2225 if (LegalVT.getSimpleVT().is512BitVector())
2226 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2227
2228 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2229 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2230 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2231 // vXi16/vXi8.
2232 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2233 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2234 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2235 }
2236 }
2237
2238 return VT.changeVectorElementTypeToInteger();
2239}
2240
2241/// Helper for getByValTypeAlignment to determine
2242/// the desired ByVal argument alignment.
2243static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2244 if (MaxAlign == 16)
2245 return;
2246 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2247 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2248 MaxAlign = Align(16);
2249 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2250 Align EltAlign;
2251 getMaxByValAlign(ATy->getElementType(), EltAlign);
2252 if (EltAlign > MaxAlign)
2253 MaxAlign = EltAlign;
2254 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2255 for (auto *EltTy : STy->elements()) {
2256 Align EltAlign;
2257 getMaxByValAlign(EltTy, EltAlign);
2258 if (EltAlign > MaxAlign)
2259 MaxAlign = EltAlign;
2260 if (MaxAlign == 16)
2261 break;
2262 }
2263 }
2264}
2265
2266/// Return the desired alignment for ByVal aggregate
2267/// function arguments in the caller parameter area. For X86, aggregates
2268/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2269/// are at 4-byte boundaries.
2270unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2271 const DataLayout &DL) const {
2272 if (Subtarget.is64Bit()) {
2273 // Max of 8 and alignment of type.
2274 Align TyAlign = DL.getABITypeAlign(Ty);
2275 if (TyAlign > 8)
2276 return TyAlign.value();
2277 return 8;
2278 }
2279
2280 Align Alignment(4);
2281 if (Subtarget.hasSSE1())
2282 getMaxByValAlign(Ty, Alignment);
2283 return Alignment.value();
2284}
2285
2286/// It returns EVT::Other if the type should be determined using generic
2287/// target-independent logic.
2288/// For vector ops we check that the overall size isn't larger than our
2289/// preferred vector width.
2290EVT X86TargetLowering::getOptimalMemOpType(
2291 const MemOp &Op, const AttributeList &FuncAttributes) const {
2292 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2293 if (Op.size() >= 16 &&
2294 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2295 // FIXME: Check if unaligned 64-byte accesses are slow.
2296 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2297 (Subtarget.getPreferVectorWidth() >= 512)) {
2298 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2299 }
2300 // FIXME: Check if unaligned 32-byte accesses are slow.
2301 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2302 (Subtarget.getPreferVectorWidth() >= 256)) {
2303 // Although this isn't a well-supported type for AVX1, we'll let
2304 // legalization and shuffle lowering produce the optimal codegen. If we
2305 // choose an optimal type with a vector element larger than a byte,
2306 // getMemsetStores() may create an intermediate splat (using an integer
2307 // multiply) before we splat as a vector.
2308 return MVT::v32i8;
2309 }
2310 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2311 return MVT::v16i8;
2312 // TODO: Can SSE1 handle a byte vector?
2313 // If we have SSE1 registers we should be able to use them.
2314 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2315 (Subtarget.getPreferVectorWidth() >= 128))
2316 return MVT::v4f32;
2317 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2318 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2319 // Do not use f64 to lower memcpy if source is string constant. It's
2320 // better to use i32 to avoid the loads.
2321 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2322 // The gymnastics of splatting a byte value into an XMM register and then
2323 // only using 8-byte stores (because this is a CPU with slow unaligned
2324 // 16-byte accesses) makes that a loser.
2325 return MVT::f64;
2326 }
2327 }
2328 // This is a compromise. If we reach here, unaligned accesses may be slow on
2329 // this target. However, creating smaller, aligned accesses could be even
2330 // slower and would certainly be a lot more code.
2331 if (Subtarget.is64Bit() && Op.size() >= 8)
2332 return MVT::i64;
2333 return MVT::i32;
2334}
2335
2336bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2337 if (VT == MVT::f32)
2338 return X86ScalarSSEf32;
2339 else if (VT == MVT::f64)
2340 return X86ScalarSSEf64;
2341 return true;
2342}
2343
2344bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2345 EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
2346 bool *Fast) const {
2347 if (Fast) {
2348 switch (VT.getSizeInBits()) {
2349 default:
2350 // 8-byte and under are always assumed to be fast.
2351 *Fast = true;
2352 break;
2353 case 128:
2354 *Fast = !Subtarget.isUnalignedMem16Slow();
2355 break;
2356 case 256:
2357 *Fast = !Subtarget.isUnalignedMem32Slow();
2358 break;
2359 // TODO: What about AVX-512 (512-bit) accesses?
2360 }
2361 }
2362 // NonTemporal vector memory ops must be aligned.
2363 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2364 // NT loads can only be vector aligned, so if its less aligned than the
2365 // minimum vector size (which we can split the vector down to), we might as
2366 // well use a regular unaligned vector load.
2367 // We don't have any NT loads pre-SSE41.
2368 if (!!(Flags & MachineMemOperand::MOLoad))
2369 return (Align < 16 || !Subtarget.hasSSE41());
2370 return false;
2371 }
2372 // Misaligned accesses of any size are always allowed.
2373 return true;
2374}
2375
2376/// Return the entry encoding for a jump table in the
2377/// current function. The returned value is a member of the
2378/// MachineJumpTableInfo::JTEntryKind enum.
2379unsigned X86TargetLowering::getJumpTableEncoding() const {
2380 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2381 // symbol.
2382 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2383 return MachineJumpTableInfo::EK_Custom32;
2384
2385 // Otherwise, use the normal jump table encoding heuristics.
2386 return TargetLowering::getJumpTableEncoding();
2387}
2388
2389bool X86TargetLowering::useSoftFloat() const {
2390 return Subtarget.useSoftFloat();
2391}
2392
2393void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2394 ArgListTy &Args) const {
2395
2396 // Only relabel X86-32 for C / Stdcall CCs.
2397 if (Subtarget.is64Bit())
2398 return;
2399 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2400 return;
2401 unsigned ParamRegs = 0;
2402 if (auto *M = MF->getFunction().getParent())
2403 ParamRegs = M->getNumberRegisterParameters();
2404
2405 // Mark the first N int arguments as having reg
2406 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2407 Type *T = Args[Idx].Ty;
2408 if (T->isIntOrPtrTy())
2409 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2410 unsigned numRegs = 1;
2411 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2412 numRegs = 2;
2413 if (ParamRegs < numRegs)
2414 return;
2415 ParamRegs -= numRegs;
2416 Args[Idx].IsInReg = true;
2417 }
2418 }
2419}
2420
2421const MCExpr *
2422X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2423 const MachineBasicBlock *MBB,
2424 unsigned uid,MCContext &Ctx) const{
2425 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2425, __PRETTY_FUNCTION__))
;
2426 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2427 // entries.
2428 return MCSymbolRefExpr::create(MBB->getSymbol(),
2429 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2430}
2431
2432/// Returns relocation base for the given PIC jumptable.
2433SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2434 SelectionDAG &DAG) const {
2435 if (!Subtarget.is64Bit())
2436 // This doesn't have SDLoc associated with it, but is not really the
2437 // same as a Register.
2438 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2439 getPointerTy(DAG.getDataLayout()));
2440 return Table;
2441}
2442
2443/// This returns the relocation base for the given PIC jumptable,
2444/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2445const MCExpr *X86TargetLowering::
2446getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2447 MCContext &Ctx) const {
2448 // X86-64 uses RIP relative addressing based on the jump table label.
2449 if (Subtarget.isPICStyleRIPRel())
2450 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2451
2452 // Otherwise, the reference is relative to the PIC base.
2453 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2454}
2455
2456std::pair<const TargetRegisterClass *, uint8_t>
2457X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2458 MVT VT) const {
2459 const TargetRegisterClass *RRC = nullptr;
2460 uint8_t Cost = 1;
2461 switch (VT.SimpleTy) {
2462 default:
2463 return TargetLowering::findRepresentativeClass(TRI, VT);
2464 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2465 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2466 break;
2467 case MVT::x86mmx:
2468 RRC = &X86::VR64RegClass;
2469 break;
2470 case MVT::f32: case MVT::f64:
2471 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2472 case MVT::v4f32: case MVT::v2f64:
2473 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2474 case MVT::v8f32: case MVT::v4f64:
2475 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2476 case MVT::v16f32: case MVT::v8f64:
2477 RRC = &X86::VR128XRegClass;
2478 break;
2479 }
2480 return std::make_pair(RRC, Cost);
2481}
2482
2483unsigned X86TargetLowering::getAddressSpace() const {
2484 if (Subtarget.is64Bit())
2485 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2486 return 256;
2487}
2488
2489static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2490 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2491 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2492}
2493
2494static Constant* SegmentOffset(IRBuilder<> &IRB,
2495 unsigned Offset, unsigned AddressSpace) {
2496 return ConstantExpr::getIntToPtr(
2497 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2498 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2499}
2500
2501Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2502 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2503 // tcbhead_t; use it instead of the usual global variable (see
2504 // sysdeps/{i386,x86_64}/nptl/tls.h)
2505 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2506 if (Subtarget.isTargetFuchsia()) {
2507 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2508 return SegmentOffset(IRB, 0x10, getAddressSpace());
2509 } else {
2510 unsigned AddressSpace = getAddressSpace();
2511 // Specially, some users may customize the base reg and offset.
2512 unsigned Offset = getTargetMachine().Options.StackProtectorGuardOffset;
2513 // If we don't set -stack-protector-guard-offset value:
2514 // %fs:0x28, unless we're using a Kernel code model, in which case
2515 // it's %gs:0x28. gs:0x14 on i386.
2516 if (Offset == (unsigned)-1)
2517 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2518
2519 const auto &GuardReg = getTargetMachine().Options.StackProtectorGuardReg;
2520 if (GuardReg == "fs")
2521 AddressSpace = X86AS::FS;
2522 else if (GuardReg == "gs")
2523 AddressSpace = X86AS::GS;
2524 return SegmentOffset(IRB, Offset, AddressSpace);
2525 }
2526 }
2527 return TargetLowering::getIRStackGuard(IRB);
2528}
2529
2530void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2531 // MSVC CRT provides functionalities for stack protection.
2532 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2533 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2534 // MSVC CRT has a global variable holding security cookie.
2535 M.getOrInsertGlobal("__security_cookie",
2536 Type::getInt8PtrTy(M.getContext()));
2537
2538 // MSVC CRT has a function to validate security cookie.
2539 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2540 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2541 Type::getInt8PtrTy(M.getContext()));
2542 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2543 F->setCallingConv(CallingConv::X86_FastCall);
2544 F->addAttribute(1, Attribute::AttrKind::InReg);
2545 }
2546 return;
2547 }
2548
2549 auto GuardMode = getTargetMachine().Options.StackProtectorGuard;
2550
2551 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2552 if ((GuardMode == llvm::StackProtectorGuards::TLS ||
2553 GuardMode == llvm::StackProtectorGuards::None)
2554 && hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2555 return;
2556 TargetLowering::insertSSPDeclarations(M);
2557}
2558
2559Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2560 // MSVC CRT has a global variable holding security cookie.
2561 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2562 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2563 return M.getGlobalVariable("__security_cookie");
2564 }
2565 return TargetLowering::getSDagStackGuard(M);
2566}
2567
2568Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2569 // MSVC CRT has a function to validate security cookie.
2570 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2571 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2572 return M.getFunction("__security_check_cookie");
2573 }
2574 return TargetLowering::getSSPStackGuardCheck(M);
2575}
2576
2577Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2578 if (Subtarget.getTargetTriple().isOSContiki())
2579 return getDefaultSafeStackPointerLocation(IRB, false);
2580
2581 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2582 // definition of TLS_SLOT_SAFESTACK in
2583 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2584 if (Subtarget.isTargetAndroid()) {
2585 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2586 // %gs:0x24 on i386
2587 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2588 return SegmentOffset(IRB, Offset, getAddressSpace());
2589 }
2590
2591 // Fuchsia is similar.
2592 if (Subtarget.isTargetFuchsia()) {
2593 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2594 return SegmentOffset(IRB, 0x18, getAddressSpace());
2595 }
2596
2597 return TargetLowering::getSafeStackPointerLocation(IRB);
2598}
2599
2600//===----------------------------------------------------------------------===//
2601// Return Value Calling Convention Implementation
2602//===----------------------------------------------------------------------===//
2603
2604bool X86TargetLowering::CanLowerReturn(
2605 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2606 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2607 SmallVector<CCValAssign, 16> RVLocs;
2608 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2609 return CCInfo.CheckReturn(Outs, RetCC_X86);
2610}
2611
2612const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2613 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2614 return ScratchRegs;
2615}
2616
2617/// Lowers masks values (v*i1) to the local register values
2618/// \returns DAG node after lowering to register type
2619static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2620 const SDLoc &Dl, SelectionDAG &DAG) {
2621 EVT ValVT = ValArg.getValueType();
2622
2623 if (ValVT == MVT::v1i1)
2624 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2625 DAG.getIntPtrConstant(0, Dl));
2626
2627 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2628 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2629 // Two stage lowering might be required
2630 // bitcast: v8i1 -> i8 / v16i1 -> i16
2631 // anyextend: i8 -> i32 / i16 -> i32
2632 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2633 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2634 if (ValLoc == MVT::i32)
2635 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2636 return ValToCopy;
2637 }
2638
2639 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2640 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2641 // One stage lowering is required
2642 // bitcast: v32i1 -> i32 / v64i1 -> i64
2643 return DAG.getBitcast(ValLoc, ValArg);
2644 }
2645
2646 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2647}
2648
2649/// Breaks v64i1 value into two registers and adds the new node to the DAG
2650static void Passv64i1ArgInRegs(
2651 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2652 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2653 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2654 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2654, __PRETTY_FUNCTION__))
;
2655 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2655, __PRETTY_FUNCTION__))
;
2656 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2656, __PRETTY_FUNCTION__))
;
2657 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2658, __PRETTY_FUNCTION__))
2658 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2658, __PRETTY_FUNCTION__))
;
2659
2660 // Before splitting the value we cast it to i64
2661 Arg = DAG.getBitcast(MVT::i64, Arg);
2662
2663 // Splitting the value into two i32 types
2664 SDValue Lo, Hi;
2665 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2666 DAG.getConstant(0, Dl, MVT::i32));
2667 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2668 DAG.getConstant(1, Dl, MVT::i32));
2669
2670 // Attach the two i32 types into corresponding registers
2671 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2672 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2673}
2674
2675SDValue
2676X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2677 bool isVarArg,
2678 const SmallVectorImpl<ISD::OutputArg> &Outs,
2679 const SmallVectorImpl<SDValue> &OutVals,
2680 const SDLoc &dl, SelectionDAG &DAG) const {
2681 MachineFunction &MF = DAG.getMachineFunction();
2682 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2683
2684 // In some cases we need to disable registers from the default CSR list.
2685 // For example, when they are used for argument passing.
2686 bool ShouldDisableCalleeSavedRegister =
2687 CallConv == CallingConv::X86_RegCall ||
2688 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2689
2690 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2691 report_fatal_error("X86 interrupts may not return any value");
2692
2693 SmallVector<CCValAssign, 16> RVLocs;
2694 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2695 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2696
2697 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2698 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2699 ++I, ++OutsIndex) {
2700 CCValAssign &VA = RVLocs[I];
2701 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2701, __PRETTY_FUNCTION__))
;
2702
2703 // Add the register to the CalleeSaveDisableRegs list.
2704 if (ShouldDisableCalleeSavedRegister)
2705 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2706
2707 SDValue ValToCopy = OutVals[OutsIndex];
2708 EVT ValVT = ValToCopy.getValueType();
2709
2710 // Promote values to the appropriate types.
2711 if (VA.getLocInfo() == CCValAssign::SExt)
2712 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2713 else if (VA.getLocInfo() == CCValAssign::ZExt)
2714 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2715 else if (VA.getLocInfo() == CCValAssign::AExt) {
2716 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2717 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2718 else
2719 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2720 }
2721 else if (VA.getLocInfo() == CCValAssign::BCvt)
2722 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2723
2724 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2725, __PRETTY_FUNCTION__))
2725 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2725, __PRETTY_FUNCTION__))
;
2726
2727 // Report an error if we have attempted to return a value via an XMM
2728 // register and SSE was disabled.
2729 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2730 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2731 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2732 } else if (!Subtarget.hasSSE2() &&
2733 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2734 ValVT == MVT::f64) {
2735 // When returning a double via an XMM register, report an error if SSE2 is
2736 // not enabled.
2737 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2738 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2739 }
2740
2741 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2742 // the RET instruction and handled by the FP Stackifier.
2743 if (VA.getLocReg() == X86::FP0 ||
2744 VA.getLocReg() == X86::FP1) {
2745 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2746 // change the value to the FP stack register class.
2747 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2748 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2749 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2750 // Don't emit a copytoreg.
2751 continue;
2752 }
2753
2754 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2755 // which is returned in RAX / RDX.
2756 if (Subtarget.is64Bit()) {
2757 if (ValVT == MVT::x86mmx) {
2758 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2759 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2760 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2761 ValToCopy);
2762 // If we don't have SSE2 available, convert to v4f32 so the generated
2763 // register is legal.
2764 if (!Subtarget.hasSSE2())
2765 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2766 }
2767 }
2768 }
2769
2770 if (VA.needsCustom()) {
2771 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2772, __PRETTY_FUNCTION__))
2772 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2772, __PRETTY_FUNCTION__))
;
2773
2774 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2775 Subtarget);
2776
2777 // Add the second register to the CalleeSaveDisableRegs list.
2778 if (ShouldDisableCalleeSavedRegister)
2779 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2780 } else {
2781 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2782 }
2783 }
2784
2785 SDValue Flag;
2786 SmallVector<SDValue, 6> RetOps;
2787 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2788 // Operand #1 = Bytes To Pop
2789 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2790 MVT::i32));
2791
2792 // Copy the result values into the output registers.
2793 for (auto &RetVal : RetVals) {
2794 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2795 RetOps.push_back(RetVal.second);
2796 continue; // Don't emit a copytoreg.
2797 }
2798
2799 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2800 Flag = Chain.getValue(1);
2801 RetOps.push_back(
2802 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
2803 }
2804
2805 // Swift calling convention does not require we copy the sret argument
2806 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2807
2808 // All x86 ABIs require that for returning structs by value we copy
2809 // the sret argument into %rax/%eax (depending on ABI) for the return.
2810 // We saved the argument into a virtual register in the entry block,
2811 // so now we copy the value out and into %rax/%eax.
2812 //
2813 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2814 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2815 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2816 // either case FuncInfo->setSRetReturnReg() will have been called.
2817 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
2818 // When we have both sret and another return value, we should use the
2819 // original Chain stored in RetOps[0], instead of the current Chain updated
2820 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2821
2822 // For the case of sret and another return value, we have
2823 // Chain_0 at the function entry
2824 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2825 // If we use Chain_1 in getCopyFromReg, we will have
2826 // Val = getCopyFromReg(Chain_1)
2827 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2828
2829 // getCopyToReg(Chain_0) will be glued together with
2830 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2831 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2832 // Data dependency from Unit B to Unit A due to usage of Val in
2833 // getCopyToReg(Chain_1, Val)
2834 // Chain dependency from Unit A to Unit B
2835
2836 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2837 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2838 getPointerTy(MF.getDataLayout()));
2839
2840 Register RetValReg
2841 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2842 X86::RAX : X86::EAX;
2843 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2844 Flag = Chain.getValue(1);
2845
2846 // RAX/EAX now acts like a return value.
2847 RetOps.push_back(
2848 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2849
2850 // Add the returned register to the CalleeSaveDisableRegs list.
2851 if (ShouldDisableCalleeSavedRegister)
2852 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2853 }
2854
2855 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2856 const MCPhysReg *I =
2857 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2858 if (I) {
2859 for (; *I; ++I) {
2860 if (X86::GR64RegClass.contains(*I))
2861 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2862 else
2863 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2863)
;
2864 }
2865 }
2866
2867 RetOps[0] = Chain; // Update chain.
2868
2869 // Add the flag if we have it.
2870 if (Flag.getNode())
2871 RetOps.push_back(Flag);
2872
2873 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2874 if (CallConv == CallingConv::X86_INTR)
2875 opcode = X86ISD::IRET;
2876 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2877}
2878
2879bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2880 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2881 return false;
2882
2883 SDValue TCChain = Chain;
2884 SDNode *Copy = *N->use_begin();
2885 if (Copy->getOpcode() == ISD::CopyToReg) {
2886 // If the copy has a glue operand, we conservatively assume it isn't safe to
2887 // perform a tail call.
2888 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2889 return false;
2890 TCChain = Copy->getOperand(0);
2891 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2892 return false;
2893
2894 bool HasRet = false;
2895 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2896 UI != UE; ++UI) {
2897 if (UI->getOpcode() != X86ISD::RET_FLAG)
2898 return false;
2899 // If we are returning more than one value, we can definitely
2900 // not make a tail call see PR19530
2901 if (UI->getNumOperands() > 4)
2902 return false;
2903 if (UI->getNumOperands() == 4 &&
2904 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2905 return false;
2906 HasRet = true;
2907 }
2908
2909 if (!HasRet)
2910 return false;
2911
2912 Chain = TCChain;
2913 return true;
2914}
2915
2916EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2917 ISD::NodeType ExtendKind) const {
2918 MVT ReturnMVT = MVT::i32;
2919
2920 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2921 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2922 // The ABI does not require i1, i8 or i16 to be extended.
2923 //
2924 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2925 // always extending i8/i16 return values, so keep doing that for now.
2926 // (PR26665).
2927 ReturnMVT = MVT::i8;
2928 }
2929
2930 EVT MinVT = getRegisterType(Context, ReturnMVT);
2931 return VT.bitsLT(MinVT) ? MinVT : VT;
2932}
2933
2934/// Reads two 32 bit registers and creates a 64 bit mask value.
2935/// \param VA The current 32 bit value that need to be assigned.
2936/// \param NextVA The next 32 bit value that need to be assigned.
2937/// \param Root The parent DAG node.
2938/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2939/// glue purposes. In the case the DAG is already using
2940/// physical register instead of virtual, we should glue
2941/// our new SDValue to InFlag SDvalue.
2942/// \return a new SDvalue of size 64bit.
2943static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2944 SDValue &Root, SelectionDAG &DAG,
2945 const SDLoc &Dl, const X86Subtarget &Subtarget,
2946 SDValue *InFlag = nullptr) {
2947 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2947, __PRETTY_FUNCTION__))
;
2948 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2948, __PRETTY_FUNCTION__))
;
2949 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2950, __PRETTY_FUNCTION__))
2950 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2950, __PRETTY_FUNCTION__))
;
2951 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2952, __PRETTY_FUNCTION__))
2952 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2952, __PRETTY_FUNCTION__))
;
2953 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2954, __PRETTY_FUNCTION__))
2954 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2954, __PRETTY_FUNCTION__))
;
2955
2956 SDValue Lo, Hi;
2957 SDValue ArgValueLo, ArgValueHi;
2958
2959 MachineFunction &MF = DAG.getMachineFunction();
2960 const TargetRegisterClass *RC = &X86::GR32RegClass;
2961
2962 // Read a 32 bit value from the registers.
2963 if (nullptr == InFlag) {
2964 // When no physical register is present,
2965 // create an intermediate virtual register.
2966 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
2967 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2968 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2969 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2970 } else {
2971 // When a physical register is available read the value from it and glue
2972 // the reads together.
2973 ArgValueLo =
2974 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2975 *InFlag = ArgValueLo.getValue(2);
2976 ArgValueHi =
2977 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2978 *InFlag = ArgValueHi.getValue(2);
2979 }
2980
2981 // Convert the i32 type into v32i1 type.
2982 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2983
2984 // Convert the i32 type into v32i1 type.
2985 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2986
2987 // Concatenate the two values together.
2988 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2989}
2990
2991/// The function will lower a register of various sizes (8/16/32/64)
2992/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2993/// \returns a DAG node contains the operand after lowering to mask type.
2994static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2995 const EVT &ValLoc, const SDLoc &Dl,
2996 SelectionDAG &DAG) {
2997 SDValue ValReturned = ValArg;
2998
2999 if (ValVT == MVT::v1i1)
3000 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3001
3002 if (ValVT == MVT::v64i1) {
3003 // In 32 bit machine, this case is handled by getv64i1Argument
3004 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3004, __PRETTY_FUNCTION__))
;
3005 // In 64 bit machine, There is no need to truncate the value only bitcast
3006 } else {
3007 MVT maskLen;
3008 switch (ValVT.getSimpleVT().SimpleTy) {
3009 case MVT::v8i1:
3010 maskLen = MVT::i8;
3011 break;
3012 case MVT::v16i1:
3013 maskLen = MVT::i16;
3014 break;
3015 case MVT::v32i1:
3016 maskLen = MVT::i32;
3017 break;
3018 default:
3019 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3019)
;
3020 }
3021
3022 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3023 }
3024 return DAG.getBitcast(ValVT, ValReturned);
3025}
3026
3027/// Lower the result values of a call into the
3028/// appropriate copies out of appropriate physical registers.
3029///
3030SDValue X86TargetLowering::LowerCallResult(
3031 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3032 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3033 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3034 uint32_t *RegMask) const {
3035
3036 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3037 // Assign locations to each value returned by this call.
3038 SmallVector<CCValAssign, 16> RVLocs;
3039 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3040 *DAG.getContext());
3041 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3042
3043 // Copy all of the result registers out of their specified physreg.
3044 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3045 ++I, ++InsIndex) {
3046 CCValAssign &VA = RVLocs[I];
3047 EVT CopyVT = VA.getLocVT();
3048
3049 // In some calling conventions we need to remove the used registers
3050 // from the register mask.
3051 if (RegMask) {
3052 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3053 SubRegs.isValid(); ++SubRegs)
3054 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3055 }
3056
3057 // Report an error if there was an attempt to return FP values via XMM
3058 // registers.
3059 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3060 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3061 if (VA.getLocReg() == X86::XMM1)
3062 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3063 else
3064 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3065 } else if (!Subtarget.hasSSE2() &&
3066 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3067 CopyVT == MVT::f64) {
3068 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3069 if (VA.getLocReg() == X86::XMM1)
3070 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3071 else
3072 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3073 }
3074
3075 // If we prefer to use the value in xmm registers, copy it out as f80 and
3076 // use a truncate to move it from fp stack reg to xmm reg.
3077 bool RoundAfterCopy = false;
3078 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3079 isScalarFPTypeInSSEReg(VA.getValVT())) {
3080 if (!Subtarget.hasX87())
3081 report_fatal_error("X87 register return with X87 disabled");
3082 CopyVT = MVT::f80;
3083 RoundAfterCopy = (CopyVT != VA.getLocVT());
3084 }
3085
3086 SDValue Val;
3087 if (VA.needsCustom()) {
3088 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3089, __PRETTY_FUNCTION__))
3089 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3089, __PRETTY_FUNCTION__))
;
3090 Val =
3091 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3092 } else {
3093 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3094 .getValue(1);
3095 Val = Chain.getValue(0);
3096 InFlag = Chain.getValue(2);
3097 }
3098
3099 if (RoundAfterCopy)
3100 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3101 // This truncation won't change the value.
3102 DAG.getIntPtrConstant(1, dl));
3103
3104 if (VA.isExtInLoc()) {
3105 if (VA.getValVT().isVector() &&
3106 VA.getValVT().getScalarType() == MVT::i1 &&
3107 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3108 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3109 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3110 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3111 } else
3112 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3113 }
3114
3115 if (VA.getLocInfo() == CCValAssign::BCvt)
3116 Val = DAG.getBitcast(VA.getValVT(), Val);
3117
3118 InVals.push_back(Val);
3119 }
3120
3121 return Chain;
3122}
3123
3124//===----------------------------------------------------------------------===//
3125// C & StdCall & Fast Calling Convention implementation
3126//===----------------------------------------------------------------------===//
3127// StdCall calling convention seems to be standard for many Windows' API
3128// routines and around. It differs from C calling convention just a little:
3129// callee should clean up the stack, not caller. Symbols should be also
3130// decorated in some fancy way :) It doesn't support any vector arguments.
3131// For info on fast calling convention see Fast Calling Convention (tail call)
3132// implementation LowerX86_32FastCCCallTo.
3133
3134/// CallIsStructReturn - Determines whether a call uses struct return
3135/// semantics.
3136enum StructReturnType {
3137 NotStructReturn,
3138 RegStructReturn,
3139 StackStructReturn
3140};
3141static StructReturnType
3142callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3143 if (Outs.empty())
3144 return NotStructReturn;
3145
3146 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3147 if (!Flags.isSRet())
3148 return NotStructReturn;
3149 if (Flags.isInReg() || IsMCU)
3150 return RegStructReturn;
3151 return StackStructReturn;
3152}
3153
3154/// Determines whether a function uses struct return semantics.
3155static StructReturnType
3156argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3157 if (Ins.empty())
3158 return NotStructReturn;
3159
3160 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3161 if (!Flags.isSRet())
3162 return NotStructReturn;
3163 if (Flags.isInReg() || IsMCU)
3164 return RegStructReturn;
3165 return StackStructReturn;
3166}
3167
3168/// Make a copy of an aggregate at address specified by "Src" to address
3169/// "Dst" with size and alignment information specified by the specific
3170/// parameter attribute. The copy will be passed as a byval function parameter.
3171static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3172 SDValue Chain, ISD::ArgFlagsTy Flags,
3173 SelectionDAG &DAG, const SDLoc &dl) {
3174 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3175
3176 return DAG.getMemcpy(
3177 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3178 /*isVolatile*/ false, /*AlwaysInline=*/true,
3179 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3180}
3181
3182/// Return true if the calling convention is one that we can guarantee TCO for.
3183static bool canGuaranteeTCO(CallingConv::ID CC) {
3184 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3185 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3186 CC == CallingConv::HHVM || CC == CallingConv::Tail);
3187}
3188
3189/// Return true if we might ever do TCO for calls with this calling convention.
3190static bool mayTailCallThisCC(CallingConv::ID CC) {
3191 switch (CC) {
3192 // C calling conventions:
3193 case CallingConv::C:
3194 case CallingConv::Win64:
3195 case CallingConv::X86_64_SysV:
3196 // Callee pop conventions:
3197 case CallingConv::X86_ThisCall:
3198 case CallingConv::X86_StdCall:
3199 case CallingConv::X86_VectorCall:
3200 case CallingConv::X86_FastCall:
3201 // Swift:
3202 case CallingConv::Swift:
3203 return true;
3204 default:
3205 return canGuaranteeTCO(CC);
3206 }
3207}
3208
3209/// Return true if the function is being made into a tailcall target by
3210/// changing its ABI.
3211static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3212 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
3213}
3214
3215bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3216 if (!CI->isTailCall())
3217 return false;
3218
3219 CallingConv::ID CalleeCC = CI->getCallingConv();
3220 if (!mayTailCallThisCC(CalleeCC))
3221 return false;
3222
3223 return true;
3224}
3225
3226SDValue
3227X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3228 const SmallVectorImpl<ISD::InputArg> &Ins,
3229 const SDLoc &dl, SelectionDAG &DAG,
3230 const CCValAssign &VA,
3231 MachineFrameInfo &MFI, unsigned i) const {
3232 // Create the nodes corresponding to a load from this parameter slot.
3233 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3234 bool AlwaysUseMutable = shouldGuaranteeTCO(
3235 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3236 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3237 EVT ValVT;
3238 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3239
3240 // If value is passed by pointer we have address passed instead of the value
3241 // itself. No need to extend if the mask value and location share the same
3242 // absolute size.
3243 bool ExtendedInMem =
3244 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3245 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3246
3247 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3248 ValVT = VA.getLocVT();
3249 else
3250 ValVT = VA.getValVT();
3251
3252 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3253 // changed with more analysis.
3254 // In case of tail call optimization mark all arguments mutable. Since they
3255 // could be overwritten by lowering of arguments in case of a tail call.
3256 if (Flags.isByVal()) {
3257 unsigned Bytes = Flags.getByValSize();
3258 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3259
3260 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3261 // can be improved with deeper analysis.
3262 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3263 /*isAliased=*/true);
3264 return DAG.getFrameIndex(FI, PtrVT);
3265 }
3266
3267 EVT ArgVT = Ins[i].ArgVT;
3268
3269 // If this is a vector that has been split into multiple parts, and the
3270 // scalar size of the parts don't match the vector element size, then we can't
3271 // elide the copy. The parts will have padding between them instead of being
3272 // packed like a vector.
3273 bool ScalarizedAndExtendedVector =
3274 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3275 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3276
3277 // This is an argument in memory. We might be able to perform copy elision.
3278 // If the argument is passed directly in memory without any extension, then we
3279 // can perform copy elision. Large vector types, for example, may be passed
3280 // indirectly by pointer.
3281 if (Flags.isCopyElisionCandidate() &&
3282 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3283 !ScalarizedAndExtendedVector) {
3284 SDValue PartAddr;
3285 if (Ins[i].PartOffset == 0) {
3286 // If this is a one-part value or the first part of a multi-part value,
3287 // create a stack object for the entire argument value type and return a
3288 // load from our portion of it. This assumes that if the first part of an
3289 // argument is in memory, the rest will also be in memory.
3290 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3291 /*IsImmutable=*/false);
3292 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3293 return DAG.getLoad(
3294 ValVT, dl, Chain, PartAddr,
3295 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3296 } else {
3297 // This is not the first piece of an argument in memory. See if there is
3298 // already a fixed stack object including this offset. If so, assume it
3299 // was created by the PartOffset == 0 branch above and create a load from
3300 // the appropriate offset into it.
3301 int64_t PartBegin = VA.getLocMemOffset();
3302 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3303 int FI = MFI.getObjectIndexBegin();
3304 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3305 int64_t ObjBegin = MFI.getObjectOffset(FI);
3306 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3307 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3308 break;
3309 }
3310 if (MFI.isFixedObjectIndex(FI)) {
3311 SDValue Addr =
3312 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3313 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3314 return DAG.getLoad(
3315 ValVT, dl, Chain, Addr,
3316 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3317 Ins[i].PartOffset));
3318 }
3319 }
3320 }
3321
3322 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3323 VA.getLocMemOffset(), isImmutable);
3324
3325 // Set SExt or ZExt flag.
3326 if (VA.getLocInfo() == CCValAssign::ZExt) {
3327 MFI.setObjectZExt(FI, true);
3328 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3329 MFI.setObjectSExt(FI, true);
3330 }
3331
3332 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3333 SDValue Val = DAG.getLoad(
3334 ValVT, dl, Chain, FIN,
3335 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3336 return ExtendedInMem
3337 ? (VA.getValVT().isVector()
3338 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3339 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3340 : Val;
3341}
3342
3343// FIXME: Get this from tablegen.
3344static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3345 const X86Subtarget &Subtarget) {
3346 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3346, __PRETTY_FUNCTION__))
;
3347
3348 if (Subtarget.isCallingConvWin64(CallConv)) {
3349 static const MCPhysReg GPR64ArgRegsWin64[] = {
3350 X86::RCX, X86::RDX, X86::R8, X86::R9
3351 };
3352 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3353 }
3354
3355 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3356 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3357 };
3358 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3359}
3360
3361// FIXME: Get this from tablegen.
3362static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3363 CallingConv::ID CallConv,
3364 const X86Subtarget &Subtarget) {
3365 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3365, __PRETTY_FUNCTION__))
;
3366 if (Subtarget.isCallingConvWin64(CallConv)) {
3367 // The XMM registers which might contain var arg parameters are shadowed
3368 // in their paired GPR. So we only need to save the GPR to their home
3369 // slots.
3370 // TODO: __vectorcall will change this.
3371 return None;
3372 }
3373
3374 const Function &F = MF.getFunction();
3375 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3376 bool isSoftFloat = Subtarget.useSoftFloat();
3377 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3378, __PRETTY_FUNCTION__))
3378 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3378, __PRETTY_FUNCTION__))
;
3379 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3380 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3381 // registers.
3382 return None;
3383
3384 static const MCPhysReg XMMArgRegs64Bit[] = {
3385 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3386 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3387 };
3388 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3389}
3390
3391#ifndef NDEBUG
3392static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3393 return llvm::is_sorted(
3394 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3395 return A.getValNo() < B.getValNo();
3396 });
3397}
3398#endif
3399
3400namespace {
3401/// This is a helper class for lowering variable arguments parameters.
3402class VarArgsLoweringHelper {
3403public:
3404 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3405 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3406 CallingConv::ID CallConv, CCState &CCInfo)
3407 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3408 TheMachineFunction(DAG.getMachineFunction()),
3409 TheFunction(TheMachineFunction.getFunction()),
3410 FrameInfo(TheMachineFunction.getFrameInfo()),
3411 FrameLowering(*Subtarget.getFrameLowering()),
3412 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3413 CCInfo(CCInfo) {}
3414
3415 // Lower variable arguments parameters.
3416 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3417
3418private:
3419 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3420
3421 void forwardMustTailParameters(SDValue &Chain);
3422
3423 bool is64Bit() const { return Subtarget.is64Bit(); }
3424 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3425
3426 X86MachineFunctionInfo *FuncInfo;
3427 const SDLoc &DL;
3428 SelectionDAG &DAG;
3429 const X86Subtarget &Subtarget;
3430 MachineFunction &TheMachineFunction;
3431 const Function &TheFunction;
3432 MachineFrameInfo &FrameInfo;
3433 const TargetFrameLowering &FrameLowering;
3434 const TargetLowering &TargLowering;
3435 CallingConv::ID CallConv;
3436 CCState &CCInfo;
3437};
3438} // namespace
3439
3440void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3441 SDValue &Chain, unsigned StackSize) {
3442 // If the function takes variable number of arguments, make a frame index for
3443 // the start of the first vararg value... for expansion of llvm.va_start. We
3444 // can skip this if there are no va_start calls.
3445 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3446 CallConv != CallingConv::X86_ThisCall)) {
3447 FuncInfo->setVarArgsFrameIndex(
3448 FrameInfo.CreateFixedObject(1, StackSize, true));
3449 }
3450
3451 // Figure out if XMM registers are in use.
3452 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3454, __PRETTY_FUNCTION__))
3453 TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3454, __PRETTY_FUNCTION__))
3454 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3454, __PRETTY_FUNCTION__))
;
3455
3456 // 64-bit calling conventions support varargs and register parameters, so we
3457 // have to do extra work to spill them in the prologue.
3458 if (is64Bit()) {
3459 // Find the first unallocated argument registers.
3460 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3461 ArrayRef<MCPhysReg> ArgXMMs =
3462 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3463 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3464 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3465
3466 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3467, __PRETTY_FUNCTION__))
3467 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3467, __PRETTY_FUNCTION__))
;
3468
3469 if (isWin64()) {
3470 // Get to the caller-allocated home save location. Add 8 to account
3471 // for the return address.
3472 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3473 FuncInfo->setRegSaveFrameIndex(
3474 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3475 // Fixup to set vararg frame on shadow area (4 x i64).
3476 if (NumIntRegs < 4)
3477 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3478 } else {
3479 // For X86-64, if there are vararg parameters that are passed via
3480 // registers, then we must store them to their spots on the stack so
3481 // they may be loaded by dereferencing the result of va_next.
3482 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3483 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3484 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3485 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3486 }
3487
3488 SmallVector<SDValue, 6>
3489 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3490 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3491 // keeping live input value
3492 SDValue ALVal; // if applicable keeps SDValue for %al register
3493
3494 // Gather all the live in physical registers.
3495 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3496 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3497 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3498 }
3499 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3500 if (!AvailableXmms.empty()) {
3501 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3502 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3503 for (MCPhysReg Reg : AvailableXmms) {
3504 Register XMMReg = TheMachineFunction.addLiveIn(Reg, &X86::VR128RegClass);
3505 LiveXMMRegs.push_back(
3506 DAG.getCopyFromReg(Chain, DL, XMMReg, MVT::v4f32));
3507 }
3508 }
3509
3510 // Store the integer parameter registers.
3511 SmallVector<SDValue, 8> MemOps;
3512 SDValue RSFIN =
3513 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3514 TargLowering.getPointerTy(DAG.getDataLayout()));
3515 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3516 for (SDValue Val : LiveGPRs) {
3517 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3518 TargLowering.getPointerTy(DAG.getDataLayout()),
3519 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3520 SDValue Store =
3521 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3522 MachinePointerInfo::getFixedStack(
3523 DAG.getMachineFunction(),
3524 FuncInfo->getRegSaveFrameIndex(), Offset));
3525 MemOps.push_back(Store);
3526 Offset += 8;
3527 }
3528
3529 // Now store the XMM (fp + vector) parameter registers.
3530 if (!LiveXMMRegs.empty()) {
3531 SmallVector<SDValue, 12> SaveXMMOps;
3532 SaveXMMOps.push_back(Chain);
3533 SaveXMMOps.push_back(ALVal);
3534 SaveXMMOps.push_back(
3535 DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
3536 SaveXMMOps.push_back(
3537 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3538 llvm::append_range(SaveXMMOps, LiveXMMRegs);
3539 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
3540 MVT::Other, SaveXMMOps));
3541 }
3542
3543 if (!MemOps.empty())
3544 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3545 }
3546}
3547
3548void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3549 // Find the largest legal vector type.
3550 MVT VecVT = MVT::Other;
3551 // FIXME: Only some x86_32 calling conventions support AVX512.
3552 if (Subtarget.useAVX512Regs() &&
3553 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3554 CallConv == CallingConv::Intel_OCL_BI)))
3555 VecVT = MVT::v16f32;
3556 else if (Subtarget.hasAVX())
3557 VecVT = MVT::v8f32;
3558 else if (Subtarget.hasSSE2())
3559 VecVT = MVT::v4f32;
3560
3561 // We forward some GPRs and some vector types.
3562 SmallVector<MVT, 2> RegParmTypes;
3563 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3564 RegParmTypes.push_back(IntVT);
3565 if (VecVT != MVT::Other)
3566 RegParmTypes.push_back(VecVT);
3567
3568 // Compute the set of forwarded registers. The rest are scratch.
3569 SmallVectorImpl<ForwardedRegister> &Forwards =
3570 FuncInfo->getForwardedMustTailRegParms();
3571 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3572
3573 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3574 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3575 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3576 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3577 }
3578
3579 // Copy all forwards from physical to virtual registers.
3580 for (ForwardedRegister &FR : Forwards) {
3581 // FIXME: Can we use a less constrained schedule?
3582 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3583 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3584 TargLowering.getRegClassFor(FR.VT));
3585 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3586 }
3587}
3588
3589void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3590 unsigned StackSize) {
3591 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3592 // If necessary, it would be set into the correct value later.
3593 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3594 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3595
3596 if (FrameInfo.hasVAStart())
3597 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3598
3599 if (FrameInfo.hasMustTailInVarArgFunc())
3600 forwardMustTailParameters(Chain);
3601}
3602
3603SDValue X86TargetLowering::LowerFormalArguments(
3604 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3605 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3606 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3607 MachineFunction &MF = DAG.getMachineFunction();
3608 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3609
3610 const Function &F = MF.getFunction();
3611 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3612 F.getName() == "main")
3613 FuncInfo->setForceFramePointer(true);
3614
3615 MachineFrameInfo &MFI = MF.getFrameInfo();
3616 bool Is64Bit = Subtarget.is64Bit();
3617 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3618
3619 assert(((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __PRETTY_FUNCTION__))
3620 !(IsVarArg && canGuaranteeTCO(CallConv)) &&((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __PRETTY_FUNCTION__))
3621 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __PRETTY_FUNCTION__))
;
3622
3623 // Assign locations to all of the incoming arguments.
3624 SmallVector<CCValAssign, 16> ArgLocs;
3625 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3626
3627 // Allocate shadow area for Win64.
3628 if (IsWin64)
3629 CCInfo.AllocateStack(32, Align(8));
3630
3631 CCInfo.AnalyzeArguments(Ins, CC_X86);
3632
3633 // In vectorcall calling convention a second pass is required for the HVA
3634 // types.
3635 if (CallingConv::X86_VectorCall == CallConv) {
3636 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3637 }
3638
3639 // The next loop assumes that the locations are in the same order of the
3640 // input arguments.
3641 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3642, __PRETTY_FUNCTION__))
3642 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3642, __PRETTY_FUNCTION__))
;
3643
3644 SDValue ArgValue;
3645 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3646 ++I, ++InsIndex) {
3647 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3647, __PRETTY_FUNCTION__))
;
3648 CCValAssign &VA = ArgLocs[I];
3649
3650 if (VA.isRegLoc()) {
3651 EVT RegVT = VA.getLocVT();
3652 if (VA.needsCustom()) {
3653 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3655, __PRETTY_FUNCTION__))
3654 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3655, __PRETTY_FUNCTION__))
3655 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3655, __PRETTY_FUNCTION__))
;
3656
3657 // v64i1 values, in regcall calling convention, that are
3658 // compiled to 32 bit arch, are split up into two registers.
3659 ArgValue =
3660 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3661 } else {
3662 const TargetRegisterClass *RC;
3663 if (RegVT == MVT::i8)
3664 RC = &X86::GR8RegClass;
3665 else if (RegVT == MVT::i16)
3666 RC = &X86::GR16RegClass;
3667 else if (RegVT == MVT::i32)
3668 RC = &X86::GR32RegClass;
3669 else if (Is64Bit && RegVT == MVT::i64)
3670 RC = &X86::GR64RegClass;
3671 else if (RegVT == MVT::f32)
3672 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3673 else if (RegVT == MVT::f64)
3674 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3675 else if (RegVT == MVT::f80)
3676 RC = &X86::RFP80RegClass;
3677 else if (RegVT == MVT::f128)
3678 RC = &X86::VR128RegClass;
3679 else if (RegVT.is512BitVector())
3680 RC = &X86::VR512RegClass;
3681 else if (RegVT.is256BitVector())
3682 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3683 else if (RegVT.is128BitVector())
3684 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3685 else if (RegVT == MVT::x86mmx)
3686 RC = &X86::VR64RegClass;
3687 else if (RegVT == MVT::v1i1)
3688 RC = &X86::VK1RegClass;
3689 else if (RegVT == MVT::v8i1)
3690 RC = &X86::VK8RegClass;
3691 else if (RegVT == MVT::v16i1)
3692 RC = &X86::VK16RegClass;
3693 else if (RegVT == MVT::v32i1)
3694 RC = &X86::VK32RegClass;
3695 else if (RegVT == MVT::v64i1)
3696 RC = &X86::VK64RegClass;
3697 else
3698 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3698)
;
3699
3700 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3701 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3702 }
3703
3704 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3705 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3706 // right size.
3707 if (VA.getLocInfo() == CCValAssign::SExt)
3708 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3709 DAG.getValueType(VA.getValVT()));
3710 else if (VA.getLocInfo() == CCValAssign::ZExt)
3711 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3712 DAG.getValueType(VA.getValVT()));
3713 else if (VA.getLocInfo() == CCValAssign::BCvt)
3714 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3715
3716 if (VA.isExtInLoc()) {
3717 // Handle MMX values passed in XMM regs.
3718 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3719 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3720 else if (VA.getValVT().isVector() &&
3721 VA.getValVT().getScalarType() == MVT::i1 &&
3722 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3723 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3724 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3725 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3726 } else
3727 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3728 }
3729 } else {
3730 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3730, __PRETTY_FUNCTION__))
;
3731 ArgValue =
3732 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3733 }
3734
3735 // If value is passed via pointer - do a load.
3736 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3737 ArgValue =
3738 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3739
3740 InVals.push_back(ArgValue);
3741 }
3742
3743 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3744 // Swift calling convention does not require we copy the sret argument
3745 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3746 if (CallConv == CallingConv::Swift)
3747 continue;
3748
3749 // All x86 ABIs require that for returning structs by value we copy the
3750 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3751 // the argument into a virtual register so that we can access it from the
3752 // return points.
3753 if (Ins[I].Flags.isSRet()) {
3754 Register Reg = FuncInfo->getSRetReturnReg();
3755 if (!Reg) {
3756 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3757 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3758 FuncInfo->setSRetReturnReg(Reg);
3759 }
3760 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3761 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3762 break;
3763 }
3764 }
3765
3766 unsigned StackSize = CCInfo.getNextStackOffset();
3767 // Align stack specially for tail calls.
3768 if (shouldGuaranteeTCO(CallConv,
3769 MF.getTarget().Options.GuaranteedTailCallOpt))
3770 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3771
3772 if (IsVarArg)
3773 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
3774 .lowerVarArgsParameters(Chain, StackSize);
3775
3776 // Some CCs need callee pop.
3777 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
3778 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3779 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3780 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3781 // X86 interrupts must pop the error code (and the alignment padding) if
3782 // present.
3783 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3784 } else {
3785 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3786 // If this is an sret function, the return should pop the hidden pointer.
3787 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3788 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3789 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3790 FuncInfo->setBytesToPopOnReturn(4);
3791 }
3792
3793 if (!Is64Bit) {
3794 // RegSaveFrameIndex is X86-64 only.
3795 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3796 }
3797
3798 FuncInfo->setArgumentStackSize(StackSize);
3799
3800 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3801 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3802 if (Personality == EHPersonality::CoreCLR) {
3803 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3803, __PRETTY_FUNCTION__))
;
3804 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3805 // that we'd prefer this slot be allocated towards the bottom of the frame
3806 // (i.e. near the stack pointer after allocating the frame). Every
3807 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3808 // offset from the bottom of this and each funclet's frame must be the
3809 // same, so the size of funclets' (mostly empty) frames is dictated by
3810 // how far this slot is from the bottom (since they allocate just enough
3811 // space to accommodate holding this slot at the correct offset).
3812 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
3813 EHInfo->PSPSymFrameIdx = PSPSymFI;
3814 }
3815 }
3816
3817 if (CallConv == CallingConv::X86_RegCall ||
3818 F.hasFnAttribute("no_caller_saved_registers")) {
3819 MachineRegisterInfo &MRI = MF.getRegInfo();
3820 for (std::pair<Register, Register> Pair : MRI.liveins())
3821 MRI.disableCalleeSavedRegister(Pair.first);
3822 }
3823
3824 return Chain;
3825}
3826
3827SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3828 SDValue Arg, const SDLoc &dl,
3829 SelectionDAG &DAG,
3830 const CCValAssign &VA,
3831 ISD::ArgFlagsTy Flags,
3832 bool isByVal) const {
3833 unsigned LocMemOffset = VA.getLocMemOffset();
3834 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3835 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3836 StackPtr, PtrOff);
3837 if (isByVal)
3838 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3839
3840 return DAG.getStore(
3841 Chain, dl, Arg, PtrOff,
3842 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3843}
3844
3845/// Emit a load of return address if tail call
3846/// optimization is performed and it is required.
3847SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3848 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3849 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3850 // Adjust the Return address stack slot.
3851 EVT VT = getPointerTy(DAG.getDataLayout());
3852 OutRetAddr = getReturnAddressFrameIndex(DAG);
3853
3854 // Load the "old" Return address.
3855 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3856 return SDValue(OutRetAddr.getNode(), 1);
3857}
3858
3859/// Emit a store of the return address if tail call
3860/// optimization is performed and it is required (FPDiff!=0).
3861static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3862 SDValue Chain, SDValue RetAddrFrIdx,
3863 EVT PtrVT, unsigned SlotSize,
3864 int FPDiff, const SDLoc &dl) {
3865 // Store the return address to the appropriate stack slot.
3866 if (!FPDiff) return Chain;
3867 // Calculate the new stack slot for the return address.
3868 int NewReturnAddrFI =
3869 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3870 false);
3871 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3872 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3873 MachinePointerInfo::getFixedStack(
3874 DAG.getMachineFunction(), NewReturnAddrFI));
3875 return Chain;
3876}
3877
3878/// Returns a vector_shuffle mask for an movs{s|d}, movd
3879/// operation of specified width.
3880static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3881 SDValue V2) {
3882 unsigned NumElems = VT.getVectorNumElements();
3883 SmallVector<int, 8> Mask;
3884 Mask.push_back(NumElems);
3885 for (unsigned i = 1; i != NumElems; ++i)
3886 Mask.push_back(i);
3887 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3888}
3889
3890SDValue
3891X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3892 SmallVectorImpl<SDValue> &InVals) const {
3893 SelectionDAG &DAG = CLI.DAG;
3894 SDLoc &dl = CLI.DL;
3895 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3896 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3897 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3898 SDValue Chain = CLI.Chain;
3899 SDValue Callee = CLI.Callee;
3900 CallingConv::ID CallConv = CLI.CallConv;
3901 bool &isTailCall = CLI.IsTailCall;
3902 bool isVarArg = CLI.IsVarArg;
3903
3904 MachineFunction &MF = DAG.getMachineFunction();
3905 bool Is64Bit = Subtarget.is64Bit();
3906 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3907 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3908 bool IsSibcall = false;
3909 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3910 CallConv == CallingConv::Tail;
3911 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3912 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CB);
3913 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3914 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3915 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3916 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CB);
3917 bool HasNoCfCheck =
3918 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3919 bool IsIndirectCall = (CI && CI->isIndirectCall());
3920 const Module *M = MF.getMMI().getModule();
3921 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3922
3923 MachineFunction::CallSiteInfo CSInfo;
3924 if (CallConv == CallingConv::X86_INTR)
3925 report_fatal_error("X86 interrupts may not be called directly");
3926
3927 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
3928 // If we are using a GOT, disable tail calls to external symbols with
3929 // default visibility. Tail calling such a symbol requires using a GOT
3930 // relocation, which forces early binding of the symbol. This breaks code
3931 // that require lazy function symbol resolution. Using musttail or
3932 // GuaranteedTailCallOpt will override this.
3933 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3934 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3935 G->getGlobal()->hasDefaultVisibility()))
3936 isTailCall = false;
3937 }
3938
3939 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
3940 if (IsMustTail) {
3941 // Force this to be a tail call. The verifier rules are enough to ensure
3942 // that we can lower this successfully without moving the return address
3943 // around.
3944 isTailCall = true;
3945 } else if (isTailCall) {
3946 // Check if it's really possible to do a tail call.
3947 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3948 isVarArg, SR != NotStructReturn,
3949 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3950 Outs, OutVals, Ins, DAG);
3951
3952 // Sibcalls are automatically detected tailcalls which do not require
3953 // ABI changes.
3954 if (!IsGuaranteeTCO && isTailCall)
3955 IsSibcall = true;
3956
3957 if (isTailCall)
3958 ++NumTailCalls;
3959 }
3960
3961 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3962, __PRETTY_FUNCTION__))
3962 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3962, __PRETTY_FUNCTION__))
;
3963
3964 // Analyze operands of the call, assigning locations to each operand.
3965 SmallVector<CCValAssign, 16> ArgLocs;
3966 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3967
3968 // Allocate shadow area for Win64.
3969 if (IsWin64)
3970 CCInfo.AllocateStack(32, Align(8));
3971
3972 CCInfo.AnalyzeArguments(Outs, CC_X86);
3973
3974 // In vectorcall calling convention a second pass is required for the HVA
3975 // types.
3976 if (CallingConv::X86_VectorCall == CallConv) {
3977 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3978 }
3979
3980 // Get a count of how many bytes are to be pushed on the stack.
3981 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3982 if (IsSibcall)
3983 // This is a sibcall. The memory operands are available in caller's
3984 // own caller's stack.
3985 NumBytes = 0;
3986 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
3987 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3988
3989 int FPDiff = 0;
3990 if (isTailCall && !IsSibcall && !IsMustTail) {
3991 // Lower arguments at fp - stackoffset + fpdiff.
3992 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3993
3994 FPDiff = NumBytesCallerPushed - NumBytes;
3995
3996 // Set the delta of movement of the returnaddr stackslot.
3997 // But only set if delta is greater than previous delta.
3998 if (FPDiff < X86Info->getTCReturnAddrDelta())
3999 X86Info->setTCReturnAddrDelta(FPDiff);
4000 }
4001
4002 unsigned NumBytesToPush = NumBytes;
4003 unsigned NumBytesToPop = NumBytes;
4004
4005 // If we have an inalloca argument, all stack space has already been allocated
4006 // for us and be right at the top of the stack. We don't support multiple
4007 // arguments passed in memory when using inalloca.
4008 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4009 NumBytesToPush = 0;
4010 if (!ArgLocs.back().isMemLoc())
4011 report_fatal_error("cannot use inalloca attribute on a register "
4012 "parameter");
4013 if (ArgLocs.back().getLocMemOffset() != 0)
4014 report_fatal_error("any parameter with the inalloca attribute must be "
4015 "the only memory argument");
4016 } else if (CLI.IsPreallocated) {
4017 assert(ArgLocs.back().isMemLoc() &&((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4019, __PRETTY_FUNCTION__))
4018 "cannot use preallocated attribute on a register "((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4019, __PRETTY_FUNCTION__))
4019 "parameter")((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4019, __PRETTY_FUNCTION__))
;
4020 SmallVector<size_t, 4> PreallocatedOffsets;
4021 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4022 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4023 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4024 }
4025 }
4026 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4027 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4028 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4029 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4030 NumBytesToPush = 0;
4031 }
4032
4033 if (!IsSibcall && !IsMustTail)
4034 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4035 NumBytes - NumBytesToPush, dl);
4036
4037 SDValue RetAddrFrIdx;
4038 // Load return address for tail calls.
4039 if (isTailCall && FPDiff)
4040 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4041 Is64Bit, FPDiff, dl);
4042
4043 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4044 SmallVector<SDValue, 8> MemOpChains;
4045 SDValue StackPtr;
4046
4047 // The next loop assumes that the locations are in the same order of the
4048 // input arguments.
4049 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4050, __PRETTY_FUNCTION__))
4050 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4050, __PRETTY_FUNCTION__))
;
4051
4052 // Walk the register/memloc assignments, inserting copies/loads. In the case
4053 // of tail call optimization arguments are handle later.
4054 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4055 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4056 ++I, ++OutIndex) {
4057 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4057, __PRETTY_FUNCTION__))
;
4058 // Skip inalloca/preallocated arguments, they have already been written.
4059 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4060 if (Flags.isInAlloca() || Flags.isPreallocated())
4061 continue;
4062
4063 CCValAssign &VA = ArgLocs[I];
4064 EVT RegVT = VA.getLocVT();
4065 SDValue Arg = OutVals[OutIndex];
4066 bool isByVal = Flags.isByVal();
4067
4068 // Promote the value if needed.
4069 switch (VA.getLocInfo()) {
4070 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4070)
;
4071 case CCValAssign::Full: break;
4072 case CCValAssign::SExt:
4073 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4074 break;
4075 case CCValAssign::ZExt:
4076 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4077 break;
4078 case CCValAssign::AExt:
4079 if (Arg.getValueType().isVector() &&
4080 Arg.getValueType().getVectorElementType() == MVT::i1)
4081 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4082 else if (RegVT.is128BitVector()) {
4083 // Special case: passing MMX values in XMM registers.
4084 Arg = DAG.getBitcast(MVT::i64, Arg);
4085 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4086 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4087 } else
4088 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4089 break;
4090 case CCValAssign::BCvt:
4091 Arg = DAG.getBitcast(RegVT, Arg);
4092 break;
4093 case CCValAssign::Indirect: {
4094 if (isByVal) {
4095 // Memcpy the argument to a temporary stack slot to prevent
4096 // the caller from seeing any modifications the callee may make
4097 // as guaranteed by the `byval` attribute.
4098 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4099 Flags.getByValSize(),
4100 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4101 SDValue StackSlot =
4102 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4103 Chain =
4104 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4105 // From now on treat this as a regular pointer
4106 Arg = StackSlot;
4107 isByVal = false;
4108 } else {
4109 // Store the argument.
4110 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4111 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4112 Chain = DAG.getStore(
4113 Chain, dl, Arg, SpillSlot,
4114 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4115 Arg = SpillSlot;
4116 }
4117 break;
4118 }
4119 }
4120
4121 if (VA.needsCustom()) {
4122 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4123, __PRETTY_FUNCTION__))
4123 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4123, __PRETTY_FUNCTION__))
;
4124 // Split v64i1 value into two registers
4125 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4126 } else if (VA.isRegLoc()) {
4127 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4128 const TargetOptions &Options = DAG.getTarget().Options;
4129 if (Options.EmitCallSiteInfo)
4130 CSInfo.emplace_back(VA.getLocReg(), I);
4131 if (isVarArg && IsWin64) {
4132 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4133 // shadow reg if callee is a varargs function.
4134 Register ShadowReg;
4135 switch (VA.getLocReg()) {
4136 case X86::XMM0: ShadowReg = X86::RCX; break;
4137 case X86::XMM1: ShadowReg = X86::RDX; break;
4138 case X86::XMM2: ShadowReg = X86::R8; break;
4139 case X86::XMM3: ShadowReg = X86::R9; break;
4140 }
4141 if (ShadowReg)
4142 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4143 }
4144 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4145 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4145, __PRETTY_FUNCTION__))
;
4146 if (!StackPtr.getNode())
4147 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4148 getPointerTy(DAG.getDataLayout()));
4149 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4150 dl, DAG, VA, Flags, isByVal));
4151 }
4152 }
4153
4154 if (!MemOpChains.empty())
4155 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4156
4157 if (Subtarget.isPICStyleGOT()) {
4158 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4159 // GOT pointer (except regcall).
4160 if (!isTailCall) {
4161 // Indirect call with RegCall calling convertion may use up all the
4162 // general registers, so it is not suitable to bind EBX reister for
4163 // GOT address, just let register allocator handle it.
4164 if (CallConv != CallingConv::X86_RegCall)
4165 RegsToPass.push_back(std::make_pair(
4166 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4167 getPointerTy(DAG.getDataLayout()))));
4168 } else {
4169 // If we are tail calling and generating PIC/GOT style code load the
4170 // address of the callee into ECX. The value in ecx is used as target of
4171 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4172 // for tail calls on PIC/GOT architectures. Normally we would just put the
4173 // address of GOT into ebx and then call target@PLT. But for tail calls
4174 // ebx would be restored (since ebx is callee saved) before jumping to the
4175 // target@PLT.
4176
4177 // Note: The actual moving to ECX is done further down.
4178 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4179 if (G && !G->getGlobal()->hasLocalLinkage() &&
4180 G->getGlobal()->hasDefaultVisibility())
4181 Callee = LowerGlobalAddress(Callee, DAG);
4182 else if (isa<ExternalSymbolSDNode>(Callee))
4183 Callee = LowerExternalSymbol(Callee, DAG);
4184 }
4185 }
4186
4187 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4188 // From AMD64 ABI document:
4189 // For calls that may call functions that use varargs or stdargs
4190 // (prototype-less calls or calls to functions containing ellipsis (...) in
4191 // the declaration) %al is used as hidden argument to specify the number
4192 // of SSE registers used. The contents of %al do not need to match exactly
4193 // the number of registers, but must be an ubound on the number of SSE
4194 // registers used and is in the range 0 - 8 inclusive.
4195
4196 // Count the number of XMM registers allocated.
4197 static const MCPhysReg XMMArgRegs[] = {
4198 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4199 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4200 };
4201 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4202 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4203, __PRETTY_FUNCTION__))
4203 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4203, __PRETTY_FUNCTION__))
;
4204 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4205 DAG.getConstant(NumXMMRegs, dl,
4206 MVT::i8)));
4207 }
4208
4209 if (isVarArg && IsMustTail) {
4210 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4211 for (const auto &F : Forwards) {
4212 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4213 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4214 }
4215 }
4216
4217 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4218 // don't need this because the eligibility check rejects calls that require
4219 // shuffling arguments passed in memory.
4220 if (!IsSibcall && isTailCall) {
4221 // Force all the incoming stack arguments to be loaded from the stack
4222 // before any new outgoing arguments are stored to the stack, because the
4223 // outgoing stack slots may alias the incoming argument stack slots, and
4224 // the alias isn't otherwise explicit. This is slightly more conservative
4225 // than necessary, because it means that each store effectively depends
4226 // on every argument instead of just those arguments it would clobber.
4227 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4228
4229 SmallVector<SDValue, 8> MemOpChains2;
4230 SDValue FIN;
4231 int FI = 0;
4232 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4233 ++I, ++OutsIndex) {
4234 CCValAssign &VA = ArgLocs[I];
4235
4236 if (VA.isRegLoc()) {
4237 if (VA.needsCustom()) {
4238 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4239, __PRETTY_FUNCTION__))
4239 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4239, __PRETTY_FUNCTION__))
;
4240 // This means that we are in special case where one argument was
4241 // passed through two register locations - Skip the next location
4242 ++I;
4243 }
4244
4245 continue;
4246 }
4247
4248 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4248, __PRETTY_FUNCTION__))
;
4249 SDValue Arg = OutVals[OutsIndex];
4250 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4251 // Skip inalloca/preallocated arguments. They don't require any work.
4252 if (Flags.isInAlloca() || Flags.isPreallocated())
4253 continue;
4254 // Create frame index.
4255 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4256 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4257 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4258 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4259
4260 if (Flags.isByVal()) {
4261 // Copy relative to framepointer.
4262 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4263 if (!StackPtr.getNode())
4264 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4265 getPointerTy(DAG.getDataLayout()));
4266 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4267 StackPtr, Source);
4268
4269 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4270 ArgChain,
4271 Flags, DAG, dl));
4272 } else {
4273 // Store relative to framepointer.
4274 MemOpChains2.push_back(DAG.getStore(
4275 ArgChain, dl, Arg, FIN,
4276 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4277 }
4278 }
4279
4280 if (!MemOpChains2.empty())
4281 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4282
4283 // Store the return address to the appropriate stack slot.
4284 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4285 getPointerTy(DAG.getDataLayout()),
4286 RegInfo->getSlotSize(), FPDiff, dl);
4287 }
4288
4289 // Build a sequence of copy-to-reg nodes chained together with token chain
4290 // and flag operands which copy the outgoing args into registers.
4291 SDValue InFlag;
4292 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4293 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4294 RegsToPass[i].second, InFlag);
4295 InFlag = Chain.getValue(1);
4296 }
4297
4298 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4299 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4299, __PRETTY_FUNCTION__))
;
4300 // In the 64-bit large code model, we have to make all calls
4301 // through a register, since the call instruction's 32-bit
4302 // pc-relative offset may not be large enough to hold the whole
4303 // address.
4304 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4305 Callee->getOpcode() == ISD::ExternalSymbol) {
4306 // Lower direct calls to global addresses and external symbols. Setting
4307 // ForCall to true here has the effect of removing WrapperRIP when possible
4308 // to allow direct calls to be selected without first materializing the
4309 // address into a register.
4310 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4311 } else if (Subtarget.isTarget64BitILP32() &&
4312 Callee->getValueType(0) == MVT::i32) {
4313 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4314 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4315 }
4316
4317 // Returns a chain & a flag for retval copy to use.
4318 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4319 SmallVector<SDValue, 8> Ops;
4320
4321 if (!IsSibcall && isTailCall && !IsMustTail) {
4322 Chain = DAG.getCALLSEQ_END(Chain,
4323 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4324 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4325 InFlag = Chain.getValue(1);
4326 }
4327
4328 Ops.push_back(Chain);
4329 Ops.push_back(Callee);
4330
4331 if (isTailCall)
4332 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4333
4334 // Add argument registers to the end of the list so that they are known live
4335 // into the call.
4336 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4337 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4338 RegsToPass[i].second.getValueType()));
4339
4340 // Add a register mask operand representing the call-preserved registers.
4341 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4342 // set X86_INTR calling convention because it has the same CSR mask
4343 // (same preserved registers).
4344 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4345 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4346 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4346, __PRETTY_FUNCTION__))
;
4347
4348 // If this is an invoke in a 32-bit function using a funclet-based
4349 // personality, assume the function clobbers all registers. If an exception
4350 // is thrown, the runtime will not restore CSRs.
4351 // FIXME: Model this more precisely so that we can register allocate across
4352 // the normal edge and spill and fill across the exceptional edge.
4353 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4354 const Function &CallerFn = MF.getFunction();
4355 EHPersonality Pers =
4356 CallerFn.hasPersonalityFn()
4357 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4358 : EHPersonality::Unknown;
4359 if (isFuncletEHPersonality(Pers))
4360 Mask = RegInfo->getNoPreservedMask();
4361 }
4362
4363 // Define a new register mask from the existing mask.
4364 uint32_t *RegMask = nullptr;
4365
4366 // In some calling conventions we need to remove the used physical registers
4367 // from the reg mask.
4368 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4369 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4370
4371 // Allocate a new Reg Mask and copy Mask.
4372 RegMask = MF.allocateRegMask();
4373 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4374 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4375
4376 // Make sure all sub registers of the argument registers are reset
4377 // in the RegMask.
4378 for (auto const &RegPair : RegsToPass)
4379 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4380 SubRegs.isValid(); ++SubRegs)
4381 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4382
4383 // Create the RegMask Operand according to our updated mask.
4384 Ops.push_back(DAG.getRegisterMask(RegMask));
4385 } else {
4386 // Create the RegMask Operand according to the static mask.
4387 Ops.push_back(DAG.getRegisterMask(Mask));
4388 }
4389
4390 if (InFlag.getNode())
4391 Ops.push_back(InFlag);
4392
4393 if (isTailCall) {
4394 // We used to do:
4395 //// If this is the first return lowered for this function, add the regs
4396 //// to the liveout set for the function.
4397 // This isn't right, although it's probably harmless on x86; liveouts
4398 // should be computed from returns not tail calls. Consider a void
4399 // function making a tail call to a function returning int.
4400 MF.getFrameInfo().setHasTailCall();
4401 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4402 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4403 return Ret;
4404 }
4405
4406 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4407 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4408 } else {
4409 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4410 }
4411 InFlag = Chain.getValue(1);
4412 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4413 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4414
4415 // Save heapallocsite metadata.
4416 if (CLI.CB)
4417 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4418 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4419
4420 // Create the CALLSEQ_END node.
4421 unsigned NumBytesForCalleeToPop;
4422 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4423 DAG.getTarget().Options.GuaranteedTailCallOpt))
4424 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4425 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4426 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4427 SR == StackStructReturn)
4428 // If this is a call to a struct-return function, the callee
4429 // pops the hidden struct pointer, so we have to push it back.
4430 // This is common for Darwin/X86, Linux & Mingw32 targets.
4431 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4432 NumBytesForCalleeToPop = 4;
4433 else
4434 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4435
4436 // Returns a flag for retval copy to use.
4437 if (!IsSibcall) {
4438 Chain = DAG.getCALLSEQ_END(Chain,
4439 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4440 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4441 true),
4442 InFlag, dl);
4443 InFlag = Chain.getValue(1);
4444 }
4445
4446 // Handle result values, copying them out of physregs into vregs that we
4447 // return.
4448 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4449 InVals, RegMask);
4450}
4451
4452//===----------------------------------------------------------------------===//
4453// Fast Calling Convention (tail call) implementation
4454//===----------------------------------------------------------------------===//
4455
4456// Like std call, callee cleans arguments, convention except that ECX is
4457// reserved for storing the tail called function address. Only 2 registers are
4458// free for argument passing (inreg). Tail call optimization is performed
4459// provided:
4460// * tailcallopt is enabled
4461// * caller/callee are fastcc
4462// On X86_64 architecture with GOT-style position independent code only local
4463// (within module) calls are supported at the moment.
4464// To keep the stack aligned according to platform abi the function
4465// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4466// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4467// If a tail called function callee has more arguments than the caller the
4468// caller needs to make sure that there is room to move the RETADDR to. This is
4469// achieved by reserving an area the size of the argument delta right after the
4470// original RETADDR, but before the saved framepointer or the spilled registers
4471// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4472// stack layout:
4473// arg1
4474// arg2
4475// RETADDR
4476// [ new RETADDR
4477// move area ]
4478// (possible EBP)
4479// ESI
4480// EDI
4481// local1 ..
4482
4483/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4484/// requirement.
4485unsigned
4486X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4487 SelectionDAG &DAG) const {
4488 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4489 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4490 assert(StackSize % SlotSize == 0 &&((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4491, __PRETTY_FUNCTION__))
4491 "StackSize must be a multiple of SlotSize")((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4491, __PRETTY_FUNCTION__))
;
4492 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4493}
4494
4495/// Return true if the given stack call argument is already available in the
4496/// same position (relatively) of the caller's incoming argument stack.
4497static
4498bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4499 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4500 const X86InstrInfo *TII, const CCValAssign &VA) {
4501 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4502
4503 for (;;) {
4504 // Look through nodes that don't alter the bits of the incoming value.
4505 unsigned Op = Arg.getOpcode();
4506 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4507 Arg = Arg.getOperand(0);
4508 continue;
4509 }
4510 if (Op == ISD::TRUNCATE) {
4511 const SDValue &TruncInput = Arg.getOperand(0);
4512 if (TruncInput.getOpcode() == ISD::AssertZext &&
4513 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4514 Arg.getValueType()) {
4515 Arg = TruncInput.getOperand(0);
4516 continue;
4517 }
4518 }
4519 break;
4520 }
4521
4522 int FI = INT_MAX2147483647;
4523 if (Arg.getOpcode() == ISD::CopyFromReg) {
4524 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4525 if (!VR.isVirtual())
4526 return false;
4527 MachineInstr *Def = MRI->getVRegDef(VR);
4528 if (!Def)
4529 return false;
4530 if (!Flags.isByVal()) {
4531 if (!TII->isLoadFromStackSlot(*Def, FI))
4532 return false;
4533 } else {
4534 unsigned Opcode = Def->getOpcode();
4535 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4536 Opcode == X86::LEA64_32r) &&
4537 Def->getOperand(1).isFI()) {
4538 FI = Def->getOperand(1).getIndex();
4539 Bytes = Flags.getByValSize();
4540 } else
4541 return false;
4542 }
4543 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4544 if (Flags.isByVal())
4545 // ByVal argument is passed in as a pointer but it's now being
4546 // dereferenced. e.g.
4547 // define @foo(%struct.X* %A) {
4548 // tail call @bar(%struct.X* byval %A)
4549 // }
4550 return false;
4551 SDValue Ptr = Ld->getBasePtr();
4552 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4553 if (!FINode)
4554 return false;
4555 FI = FINode->getIndex();
4556 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4557 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4558 FI = FINode->getIndex();
4559 Bytes = Flags.getByValSize();
4560 } else
4561 return false;
4562
4563 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4563, __PRETTY_FUNCTION__))
;
4564 if (!MFI.isFixedObjectIndex(FI))
4565 return false;
4566
4567 if (Offset != MFI.getObjectOffset(FI))
4568 return false;
4569
4570 // If this is not byval, check that the argument stack object is immutable.
4571 // inalloca and argument copy elision can create mutable argument stack
4572 // objects. Byval objects can be mutated, but a byval call intends to pass the
4573 // mutated memory.
4574 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4575 return false;
4576
4577 if (VA.getLocVT().getFixedSizeInBits() >
4578 Arg.getValueSizeInBits().getFixedSize()) {
4579 // If the argument location is wider than the argument type, check that any
4580 // extension flags match.
4581 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4582 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4583 return false;
4584 }
4585 }
4586
4587 return Bytes == MFI.getObjectSize(FI);
4588}
4589
4590/// Check whether the call is eligible for tail call optimization. Targets
4591/// that want to do tail call optimization should implement this function.
4592bool X86TargetLowering::IsEligibleForTailCallOptimization(
4593 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4594 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4595 const SmallVectorImpl<ISD::OutputArg> &Outs,
4596 const SmallVectorImpl<SDValue> &OutVals,
4597 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4598 if (!mayTailCallThisCC(CalleeCC))
4599 return false;
4600
4601 // If -tailcallopt is specified, make fastcc functions tail-callable.
4602 MachineFunction &MF = DAG.getMachineFunction();
4603 const Function &CallerF = MF.getFunction();
4604
4605 // If the function return type is x86_fp80 and the callee return type is not,
4606 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4607 // perform a tailcall optimization here.
4608 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4609 return false;
4610
4611 CallingConv::ID CallerCC = CallerF.getCallingConv();
4612 bool CCMatch = CallerCC == CalleeCC;
4613 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4614 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4615 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4616 CalleeCC == CallingConv::Tail;
4617
4618 // Win64 functions have extra shadow space for argument homing. Don't do the
4619 // sibcall if the caller and callee have mismatched expectations for this
4620 // space.
4621 if (IsCalleeWin64 != IsCallerWin64)
4622 return false;
4623
4624 if (IsGuaranteeTCO) {
4625 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4626 return true;
4627 return false;
4628 }
4629
4630 // Look for obvious safe cases to perform tail call optimization that do not
4631 // require ABI changes. This is what gcc calls sibcall.
4632
4633 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4634 // emit a special epilogue.
4635 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4636 if (RegInfo->needsStackRealignment(MF))
4637 return false;
4638
4639 // Also avoid sibcall optimization if either caller or callee uses struct
4640 // return semantics.
4641 if (isCalleeStructRet || isCallerStructRet)
4642 return false;
4643
4644 // Do not sibcall optimize vararg calls unless all arguments are passed via
4645 // registers.
4646 LLVMContext &C = *DAG.getContext();
4647 if (isVarArg && !Outs.empty()) {
4648 // Optimizing for varargs on Win64 is unlikely to be safe without
4649 // additional testing.
4650 if (IsCalleeWin64 || IsCallerWin64)
4651 return false;
4652
4653 SmallVector<CCValAssign, 16> ArgLocs;
4654 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4655
4656 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4657 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4658 if (!ArgLocs[i].isRegLoc())
4659 return false;
4660 }
4661
4662 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4663 // stack. Therefore, if it's not used by the call it is not safe to optimize
4664 // this into a sibcall.
4665 bool Unused = false;
4666 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4667 if (!Ins[i].Used) {
4668 Unused = true;
4669 break;
4670 }
4671 }
4672 if (Unused) {
4673 SmallVector<CCValAssign, 16> RVLocs;
4674 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4675 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4676 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4677 CCValAssign &VA = RVLocs[i];
4678 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4679 return false;
4680 }
4681 }
4682
4683 // Check that the call results are passed in the same way.
4684 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4685 RetCC_X86, RetCC_X86))
4686 return false;
4687 // The callee has to preserve all registers the caller needs to preserve.
4688 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4689 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4690 if (!CCMatch) {
4691 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4692 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4693 return false;
4694 }
4695
4696 unsigned StackArgsSize = 0;
4697
4698 // If the callee takes no arguments then go on to check the results of the
4699 // call.
4700 if (!Outs.empty()) {
4701 // Check if stack adjustment is needed. For now, do not do this if any
4702 // argument is passed on the stack.
4703 SmallVector<CCValAssign, 16> ArgLocs;
4704 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4705
4706 // Allocate shadow area for Win64
4707 if (IsCalleeWin64)
4708 CCInfo.AllocateStack(32, Align(8));
4709
4710 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4711 StackArgsSize = CCInfo.getNextStackOffset();
4712
4713 if (CCInfo.getNextStackOffset()) {
4714 // Check if the arguments are already laid out in the right way as
4715 // the caller's fixed stack objects.
4716 MachineFrameInfo &MFI = MF.getFrameInfo();
4717 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4718 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4719 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4720 CCValAssign &VA = ArgLocs[i];
4721 SDValue Arg = OutVals[i];
4722 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4723 if (VA.getLocInfo() == CCValAssign::Indirect)
4724 return false;
4725 if (!VA.isRegLoc()) {
4726 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4727 MFI, MRI, TII, VA))
4728 return false;
4729 }
4730 }
4731 }
4732
4733 bool PositionIndependent = isPositionIndependent();
4734 // If the tailcall address may be in a register, then make sure it's
4735 // possible to register allocate for it. In 32-bit, the call address can
4736 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4737 // callee-saved registers are restored. These happen to be the same
4738 // registers used to pass 'inreg' arguments so watch out for those.
4739 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4740 !isa<ExternalSymbolSDNode>(Callee)) ||
4741 PositionIndependent)) {
4742 unsigned NumInRegs = 0;
4743 // In PIC we need an extra register to formulate the address computation
4744 // for the callee.
4745 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4746
4747 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4748 CCValAssign &VA = ArgLocs[i];
4749 if (!VA.isRegLoc())
4750 continue;
4751 Register Reg = VA.getLocReg();
4752 switch (Reg) {
4753 default: break;
4754 case X86::EAX: case X86::EDX: case X86::ECX:
4755 if (++NumInRegs == MaxInRegs)
4756 return false;
4757 break;
4758 }
4759 }
4760 }
4761
4762 const MachineRegisterInfo &MRI = MF.getRegInfo();
4763 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4764 return false;
4765 }
4766
4767 bool CalleeWillPop =
4768 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4769 MF.getTarget().Options.GuaranteedTailCallOpt);
4770
4771 if (unsigned BytesToPop =
4772 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4773 // If we have bytes to pop, the callee must pop them.
4774 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4775 if (!CalleePopMatches)
4776 return false;
4777 } else if (CalleeWillPop && StackArgsSize > 0) {
4778 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4779 return false;
4780 }
4781
4782 return true;
4783}
4784
4785FastISel *
4786X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4787 const TargetLibraryInfo *libInfo) const {
4788 return X86::createFastISel(funcInfo, libInfo);
4789}
4790
4791//===----------------------------------------------------------------------===//
4792// Other Lowering Hooks
4793//===----------------------------------------------------------------------===//
4794
4795static bool MayFoldLoad(SDValue Op) {
4796 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4797}
4798
4799static bool MayFoldIntoStore(SDValue Op) {
4800 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4801}
4802
4803static bool MayFoldIntoZeroExtend(SDValue Op) {
4804 if (Op.hasOneUse()) {
4805 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4806 return (ISD::ZERO_EXTEND == Opcode);
4807 }
4808 return false;
4809}
4810
4811static bool isTargetShuffle(unsigned Opcode) {
4812 switch(Opcode) {
4813 default: return false;
4814 case X86ISD::BLENDI:
4815 case X86ISD::PSHUFB:
4816 case X86ISD::PSHUFD:
4817 case X86ISD::PSHUFHW:
4818 case X86ISD::PSHUFLW:
4819 case X86ISD::SHUFP:
4820 case X86ISD::INSERTPS:
4821 case X86ISD::EXTRQI:
4822 case X86ISD::INSERTQI:
4823 case X86ISD::VALIGN:
4824 case X86ISD::PALIGNR:
4825 case X86ISD::VSHLDQ:
4826 case X86ISD::VSRLDQ:
4827 case X86ISD::MOVLHPS:
4828 case X86ISD::MOVHLPS:
4829 case X86ISD::MOVSHDUP:
4830 case X86ISD::MOVSLDUP:
4831 case X86ISD::MOVDDUP:
4832 case X86ISD::MOVSS:
4833 case X86ISD::MOVSD:
4834 case X86ISD::UNPCKL:
4835 case X86ISD::UNPCKH:
4836 case X86ISD::VBROADCAST:
4837 case X86ISD::VPERMILPI:
4838 case X86ISD::VPERMILPV:
4839 case X86ISD::VPERM2X128:
4840 case X86ISD::SHUF128:
4841 case X86ISD::VPERMIL2:
4842 case X86ISD::VPERMI:
4843 case X86ISD::VPPERM:
4844 case X86ISD::VPERMV:
4845 case X86ISD::VPERMV3:
4846 case X86ISD::VZEXT_MOVL:
4847 return true;
4848 }
4849}
4850
4851static bool isTargetShuffleVariableMask(unsigned Opcode) {
4852 switch (Opcode) {
4853 default: return false;
4854 // Target Shuffles.
4855 case X86ISD::PSHUFB:
4856 case X86ISD::VPERMILPV:
4857 case X86ISD::VPERMIL2:
4858 case X86ISD::VPPERM:
4859 case X86ISD::VPERMV:
4860 case X86ISD::VPERMV3:
4861 return true;
4862 // 'Faux' Target Shuffles.
4863 case ISD::OR:
4864 case ISD::AND:
4865 case X86ISD::ANDNP:
4866 return true;
4867 }
4868}
4869
4870static bool isTargetShuffleSplat(SDValue Op) {
4871 unsigned Opcode = Op.getOpcode();
4872 if (Opcode == ISD::EXTRACT_SUBVECTOR)
4873 return isTargetShuffleSplat(Op.getOperand(0));
4874 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
4875}
4876
4877SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4878 MachineFunction &MF = DAG.getMachineFunction();
4879 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4880 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4881 int ReturnAddrIndex = FuncInfo->getRAIndex();
4882
4883 if (ReturnAddrIndex == 0) {
4884 // Set up a frame object for the return address.
4885 unsigned SlotSize = RegInfo->getSlotSize();
4886 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4887 -(int64_t)SlotSize,
4888 false);
4889 FuncInfo->setRAIndex(ReturnAddrIndex);
4890 }
4891
4892 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4893}
4894
4895bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4896 bool hasSymbolicDisplacement) {
4897 // Offset should fit into 32 bit immediate field.
4898 if (!isInt<32>(Offset))
4899 return false;
4900
4901 // If we don't have a symbolic displacement - we don't have any extra
4902 // restrictions.
4903 if (!hasSymbolicDisplacement)
4904 return true;
4905
4906 // FIXME: Some tweaks might be needed for medium code model.
4907 if (M != CodeModel::Small && M != CodeModel::Kernel)
4908 return false;
4909
4910 // For small code model we assume that latest object is 16MB before end of 31
4911 // bits boundary. We may also accept pretty large negative constants knowing
4912 // that all objects are in the positive half of address space.
4913 if (M == CodeModel::Small && Offset < 16*1024*1024)
4914 return true;
4915
4916 // For kernel code model we know that all object resist in the negative half
4917 // of 32bits address space. We may not accept negative offsets, since they may
4918 // be just off and we may accept pretty large positive ones.
4919 if (M == CodeModel::Kernel && Offset >= 0)
4920 return true;
4921
4922 return false;
4923}
4924
4925/// Determines whether the callee is required to pop its own arguments.
4926/// Callee pop is necessary to support tail calls.
4927bool X86::isCalleePop(CallingConv::ID CallingConv,
4928 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4929 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4930 // can guarantee TCO.
4931 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4932 return true;
4933
4934 switch (CallingConv) {
4935 default:
4936 return false;
4937 case CallingConv::X86_StdCall:
4938 case CallingConv::X86_FastCall:
4939 case CallingConv::X86_ThisCall:
4940 case CallingConv::X86_VectorCall:
4941 return !is64Bit;
4942 }
4943}
4944
4945/// Return true if the condition is an signed comparison operation.
4946static bool isX86CCSigned(unsigned X86CC) {
4947 switch (X86CC) {
4948 default:
4949 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4949)
;
4950 case X86::COND_E:
4951 case X86::COND_NE:
4952 case X86::COND_B:
4953 case X86::COND_A:
4954 case X86::COND_BE:
4955 case X86::COND_AE:
4956 return false;
4957 case X86::COND_G:
4958 case X86::COND_GE:
4959 case X86::COND_L:
4960 case X86::COND_LE:
4961 return true;
4962 }
4963}
4964
4965static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4966 switch (SetCCOpcode) {
4967 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4967)
;
4968 case ISD::SETEQ: return X86::COND_E;
4969 case ISD::SETGT: return X86::COND_G;
4970 case ISD::SETGE: return X86::COND_GE;
4971 case ISD::SETLT: return X86::COND_L;
4972 case ISD::SETLE: return X86::COND_LE;
4973 case ISD::SETNE: return X86::COND_NE;
4974 case ISD::SETULT: return X86::COND_B;
4975 case ISD::SETUGT: return X86::COND_A;
4976 case ISD::SETULE: return X86::COND_BE;
4977 case ISD::SETUGE: return X86::COND_AE;
4978 }
4979}
4980
4981/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4982/// condition code, returning the condition code and the LHS/RHS of the
4983/// comparison to make.
4984static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4985 bool isFP, SDValue &LHS, SDValue &RHS,
4986 SelectionDAG &DAG) {
4987 if (!isFP) {
4988 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4989 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4990 // X > -1 -> X == 0, jump !sign.
4991 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4992 return X86::COND_NS;
4993 }
4994 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4995 // X < 0 -> X == 0, jump on sign.
4996 return X86::COND_S;
4997 }
4998 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
4999 // X >= 0 -> X == 0, jump on !sign.
5000 return X86::COND_NS;
5001 }
5002 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5003 // X < 1 -> X <= 0
5004 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5005 return X86::COND_LE;
5006 }
5007 }
5008
5009 return TranslateIntegerX86CC(SetCCOpcode);
5010 }
5011
5012 // First determine if it is required or is profitable to flip the operands.
5013
5014 // If LHS is a foldable load, but RHS is not, flip the condition.
5015 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5016 !ISD::isNON_EXTLoad(RHS.getNode())) {
5017 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5018 std::swap(LHS, RHS);
5019 }
5020
5021 switch (SetCCOpcode) {
5022 default: break;
5023 case ISD::SETOLT:
5024 case ISD::SETOLE:
5025 case ISD::SETUGT:
5026 case ISD::SETUGE:
5027 std::swap(LHS, RHS);
5028 break;
5029 }
5030
5031 // On a floating point condition, the flags are set as follows:
5032 // ZF PF CF op
5033 // 0 | 0 | 0 | X > Y
5034 // 0 | 0 | 1 | X < Y
5035 // 1 | 0 | 0 | X == Y
5036 // 1 | 1 | 1 | unordered
5037 switch (SetCCOpcode) {
5038 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5038)
;
5039 case ISD::SETUEQ:
5040 case ISD::SETEQ: return X86::COND_E;
5041 case ISD::SETOLT: // flipped
5042 case ISD::SETOGT:
5043 case ISD::SETGT: return X86::COND_A;
5044 case ISD::SETOLE: // flipped
5045 case ISD::SETOGE:
5046 case ISD::SETGE: return X86::COND_AE;
5047 case ISD::SETUGT: // flipped
5048 case ISD::SETULT:
5049 case ISD::SETLT: return X86::COND_B;
5050 case ISD::SETUGE: // flipped
5051 case ISD::SETULE:
5052 case ISD::SETLE: return X86::COND_BE;
5053 case ISD::SETONE:
5054 case ISD::SETNE: return X86::COND_NE;
5055 case ISD::SETUO: return X86::COND_P;
5056 case ISD::SETO: return X86::COND_NP;
5057 case ISD::SETOEQ:
5058 case ISD::SETUNE: return X86::COND_INVALID;
5059 }
5060}
5061
5062/// Is there a floating point cmov for the specific X86 condition code?
5063/// Current x86 isa includes the following FP cmov instructions:
5064/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5065static bool hasFPCMov(unsigned X86CC) {
5066 switch (X86CC) {
5067 default:
5068 return false;
5069 case X86::COND_B:
5070 case X86::COND_BE:
5071 case X86::COND_E:
5072 case X86::COND_P:
5073 case X86::COND_A:
5074 case X86::COND_AE:
5075 case X86::COND_NE:
5076 case X86::COND_NP:
5077 return true;
5078 }
5079}
5080
5081
5082bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5083 const CallInst &I,
5084 MachineFunction &MF,
5085 unsigned Intrinsic) const {
5086 Info.flags = MachineMemOperand::MONone;
5087 Info.offset = 0;
5088
5089 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5090 if (!IntrData) {
5091 switch (Intrinsic) {
5092 case Intrinsic::x86_aesenc128kl:
5093 case Intrinsic::x86_aesdec128kl:
5094 Info.opc = ISD::INTRINSIC_W_CHAIN;
5095 Info.ptrVal = I.getArgOperand(1);
5096 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5097 Info.align = Align(1);
5098 Info.flags |= MachineMemOperand::MOLoad;
5099 return true;
5100 case Intrinsic::x86_aesenc256kl:
5101 case Intrinsic::x86_aesdec256kl:
5102 Info.opc = ISD::INTRINSIC_W_CHAIN;
5103 Info.ptrVal = I.getArgOperand(1);
5104 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5105 Info.align = Align(1);
5106 Info.flags |= MachineMemOperand::MOLoad;
5107 return true;
5108 case Intrinsic::x86_aesencwide128kl:
5109 case Intrinsic::x86_aesdecwide128kl:
5110 Info.opc = ISD::INTRINSIC_W_CHAIN;
5111 Info.ptrVal = I.getArgOperand(0);
5112 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5113 Info.align = Align(1);
5114 Info.flags |= MachineMemOperand::MOLoad;
5115 return true;
5116 case Intrinsic::x86_aesencwide256kl:
5117 case Intrinsic::x86_aesdecwide256kl:
5118 Info.opc = ISD::INTRINSIC_W_CHAIN;
5119 Info.ptrVal = I.getArgOperand(0);
5120 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5121 Info.align = Align(1);
5122 Info.flags |= MachineMemOperand::MOLoad;
5123 return true;
5124 }
5125 return false;
5126 }
5127
5128 switch (IntrData->Type) {
5129 case TRUNCATE_TO_MEM_VI8:
5130 case TRUNCATE_TO_MEM_VI16:
5131 case TRUNCATE_TO_MEM_VI32: {
5132 Info.opc = ISD::INTRINSIC_VOID;
5133 Info.ptrVal = I.getArgOperand(0);
5134 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5135 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5136 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5137 ScalarVT = MVT::i8;
5138 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5139 ScalarVT = MVT::i16;
5140 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5141 ScalarVT = MVT::i32;
5142
5143 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5144 Info.align = Align(1);
5145 Info.flags |= MachineMemOperand::MOStore;
5146 break;
5147 }
5148 case GATHER:
5149 case GATHER_AVX2: {
5150 Info.opc = ISD::INTRINSIC_W_CHAIN;
5151 Info.ptrVal = nullptr;
5152 MVT DataVT = MVT::getVT(I.getType());
5153 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5154 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5155 IndexVT.getVectorNumElements());
5156 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5157 Info.align = Align(1);
5158 Info.flags |= MachineMemOperand::MOLoad;
5159 break;
5160 }
5161 case SCATTER: {
5162 Info.opc = ISD::INTRINSIC_VOID;
5163 Info.ptrVal = nullptr;
5164 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5165 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5166 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5167 IndexVT.getVectorNumElements());
5168 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5169 Info.align = Align(1);
5170 Info.flags |= MachineMemOperand::MOStore;
5171 break;
5172 }
5173 default:
5174 return false;
5175 }
5176
5177 return true;
5178}
5179
5180/// Returns true if the target can instruction select the
5181/// specified FP immediate natively. If false, the legalizer will
5182/// materialize the FP immediate as a load from a constant pool.
5183bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5184 bool ForCodeSize) const {
5185 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5186 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5187 return true;
5188 }
5189 return false;
5190}
5191
5192bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5193 ISD::LoadExtType ExtTy,
5194 EVT NewVT) const {
5195 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"
) ? static_cast<void> (0) : __assert_fail ("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5195, __PRETTY_FUNCTION__))
;
5196
5197 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5198 // relocation target a movq or addq instruction: don't let the load shrink.
5199 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5200 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5201 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5202 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5203
5204 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5205 // those uses are extracted directly into a store, then the extract + store
5206 // can be store-folded. Therefore, it's probably not worth splitting the load.
5207 EVT VT = Load->getValueType(0);
5208 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5209 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5210 // Skip uses of the chain value. Result 0 of the node is the load value.
5211 if (UI.getUse().getResNo() != 0)
5212 continue;
5213
5214 // If this use is not an extract + store, it's probably worth splitting.
5215 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5216 UI->use_begin()->getOpcode() != ISD::STORE)
5217 return true;
5218 }
5219 // All non-chain uses are extract + store.
5220 return false;
5221 }
5222
5223 return true;
5224}
5225
5226/// Returns true if it is beneficial to convert a load of a constant
5227/// to just the constant itself.
5228bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5229 Type *Ty) const {
5230 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5230, __PRETTY_FUNCTION__))
;
5231
5232 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5233 if (BitSize == 0 || BitSize > 64)
5234 return false;
5235 return true;
5236}
5237
5238bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5239 // If we are using XMM registers in the ABI and the condition of the select is
5240 // a floating-point compare and we have blendv or conditional move, then it is
5241 // cheaper to select instead of doing a cross-register move and creating a
5242 // load that depends on the compare result.
5243 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5244 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5245}
5246
5247bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5248 // TODO: It might be a win to ease or lift this restriction, but the generic
5249 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5250 if (VT.isVector() && Subtarget.hasAVX512())
5251 return false;
5252
5253 return true;
5254}
5255
5256bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5257 SDValue C) const {
5258 // TODO: We handle scalars using custom code, but generic combining could make
5259 // that unnecessary.
5260 APInt MulC;
5261 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5262 return false;
5263
5264 // Find the type this will be legalized too. Otherwise we might prematurely
5265 // convert this to shl+add/sub and then still have to type legalize those ops.
5266 // Another choice would be to defer the decision for illegal types until
5267 // after type legalization. But constant splat vectors of i64 can't make it
5268 // through type legalization on 32-bit targets so we would need to special
5269 // case vXi64.
5270 while (getTypeAction(Context, VT) != TypeLegal)
5271 VT = getTypeToTransformTo(Context, VT);
5272
5273 // If vector multiply is legal, assume that's faster than shl + add/sub.
5274 // TODO: Multiply is a complex op with higher latency and lower throughput in
5275 // most implementations, so this check could be loosened based on type
5276 // and/or a CPU attribute.
5277 if (isOperationLegal(ISD::MUL, VT))
5278 return false;
5279
5280 // shl+add, shl+sub, shl+add+neg
5281 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5282 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5283}
5284
5285bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5286 unsigned Index) const {
5287 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5288 return false;
5289
5290 // Mask vectors support all subregister combinations and operations that
5291 // extract half of vector.
5292 if (ResVT.getVectorElementType() == MVT::i1)
5293 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5294 (Index == ResVT.getVectorNumElements()));
5295
5296 return (Index % ResVT.getVectorNumElements()) == 0;
5297}
5298
5299bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5300 unsigned Opc = VecOp.getOpcode();
5301
5302 // Assume target opcodes can't be scalarized.
5303 // TODO - do we have any exceptions?
5304 if (Opc >= ISD::BUILTIN_OP_END)
5305 return false;
5306
5307 // If the vector op is not supported, try to convert to scalar.
5308 EVT VecVT = VecOp.getValueType();
5309 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5310 return true;
5311
5312 // If the vector op is supported, but the scalar op is not, the transform may
5313 // not be worthwhile.
5314 EVT ScalarVT = VecVT.getScalarType();
5315 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5316}
5317
5318bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5319 bool) const {
5320 // TODO: Allow vectors?
5321 if (VT.isVector())
5322 return false;
5323 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5324}
5325
5326bool X86TargetLowering::isCheapToSpeculateCttz() const {
5327 // Speculate cttz only if we can directly use TZCNT.
5328 return Subtarget.hasBMI();
5329}
5330
5331bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5332 // Speculate ctlz only if we can directly use LZCNT.
5333 return Subtarget.hasLZCNT();
5334}
5335
5336bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5337 const SelectionDAG &DAG,
5338 const MachineMemOperand &MMO) const {
5339 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5340 BitcastVT.getVectorElementType() == MVT::i1)
5341 return false;
5342
5343 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5344 return false;
5345
5346 // If both types are legal vectors, it's always ok to convert them.
5347 if (LoadVT.isVector() && BitcastVT.isVector() &&
5348 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5349 return true;
5350
5351 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5352}
5353
5354bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5355 const SelectionDAG &DAG) const {
5356 // Do not merge to float value size (128 bytes) if no implicit
5357 // float attribute is set.
5358 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5359 Attribute::NoImplicitFloat);
5360
5361 if (NoFloat) {
5362 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5363 return (MemVT.getSizeInBits() <= MaxIntSize);
5364 }
5365 // Make sure we don't merge greater than our preferred vector
5366 // width.
5367 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5368 return false;
5369
5370 return true;
5371}
5372
5373bool X86TargetLowering::isCtlzFast() const {
5374 return Subtarget.hasFastLZCNT();
5375}
5376
5377bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5378 const Instruction &AndI) const {
5379 return true;
5380}
5381
5382bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5383 EVT VT = Y.getValueType();
5384
5385 if (VT.isVector())
5386 return false;
5387
5388 if (!Subtarget.hasBMI())
5389 return false;
5390
5391 // There are only 32-bit and 64-bit forms for 'andn'.
5392 if (VT != MVT::i32 && VT != MVT::i64)
5393 return false;
5394
5395 return !isa<ConstantSDNode>(Y);
5396}
5397
5398bool X86TargetLowering::hasAndNot(SDValue Y) const {
5399 EVT VT = Y.getValueType();
5400
5401 if (!VT.isVector())
5402 return hasAndNotCompare(Y);
5403
5404 // Vector.
5405
5406 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5407 return false;
5408
5409 if (VT == MVT::v4i32)
5410 return true;
5411
5412 return Subtarget.hasSSE2();
5413}
5414
5415bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5416 return X.getValueType().isScalarInteger(); // 'bt'
5417}
5418
5419bool X86TargetLowering::
5420 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5421 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5422 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5423 SelectionDAG &DAG) const {
5424 // Does baseline recommend not to perform the fold by default?
5425 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5426 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5427 return false;
5428 // For scalars this transform is always beneficial.
5429 if (X.getValueType().isScalarInteger())
5430 return true;
5431 // If all the shift amounts are identical, then transform is beneficial even
5432 // with rudimentary SSE2 shifts.
5433 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5434 return true;
5435 // If we have AVX2 with it's powerful shift operations, then it's also good.
5436 if (Subtarget.hasAVX2())
5437 return true;
5438 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5439 return NewShiftOpcode == ISD::SHL;
5440}
5441
5442bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5443 const SDNode *N, CombineLevel Level) const {
5444 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __PRETTY_FUNCTION__))
5445 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __PRETTY_FUNCTION__))
5446 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __PRETTY_FUNCTION__))
5447 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __PRETTY_FUNCTION__))
5448 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __PRETTY_FUNCTION__))
;
5449 EVT VT = N->getValueType(0);
5450 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5451 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5452 // Only fold if the shift values are equal - so it folds to AND.
5453 // TODO - we should fold if either is a non-uniform vector but we don't do
5454 // the fold for non-splats yet.
5455 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5456 }
5457 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5458}
5459
5460bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5461 EVT VT = Y.getValueType();
5462
5463 // For vectors, we don't have a preference, but we probably want a mask.
5464 if (VT.isVector())
5465 return false;
5466
5467 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5468 if (VT == MVT::i64 && !Subtarget.is64Bit())
5469 return false;
5470
5471 return true;
5472}
5473
5474bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5475 SDNode *N) const {
5476 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5477 !Subtarget.isOSWindows())
5478 return false;
5479 return true;
5480}
5481
5482bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5483 // Any legal vector type can be splatted more efficiently than
5484 // loading/spilling from memory.
5485 return isTypeLegal(VT);
5486}
5487
5488MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5489 MVT VT = MVT::getIntegerVT(NumBits);
5490 if (isTypeLegal(VT))
5491 return VT;
5492
5493 // PMOVMSKB can handle this.
5494 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5495 return MVT::v16i8;
5496
5497 // VPMOVMSKB can handle this.
5498 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5499 return MVT::v32i8;
5500
5501 // TODO: Allow 64-bit type for 32-bit target.
5502 // TODO: 512-bit types should be allowed, but make sure that those
5503 // cases are handled in combineVectorSizedSetCCEquality().
5504
5505 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5506}
5507
5508/// Val is the undef sentinel value or equal to the specified value.
5509static bool isUndefOrEqual(int Val, int CmpVal) {
5510 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5511}
5512
5513/// Return true if every element in Mask is the undef sentinel value or equal to
5514/// the specified value..
5515static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
5516 return llvm::all_of(Mask, [CmpVal](int M) {
5517 return (M == SM_SentinelUndef) || (M == CmpVal);
5518 });
5519}
5520
5521/// Val is either the undef or zero sentinel value.
5522static bool isUndefOrZero(int Val) {
5523 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5524}
5525
5526/// Return true if every element in Mask, beginning from position Pos and ending
5527/// in Pos+Size is the undef sentinel value.
5528static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5529 return llvm::all_of(Mask.slice(Pos, Size),
5530 [](int M) { return M == SM_SentinelUndef; });
5531}
5532
5533/// Return true if the mask creates a vector whose lower half is undefined.
5534static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5535 unsigned NumElts = Mask.size();
5536 return isUndefInRange(Mask, 0, NumElts / 2);
5537}
5538
5539/// Return true if the mask creates a vector whose upper half is undefined.
5540static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5541 unsigned NumElts = Mask.size();
5542 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5543}
5544
5545/// Return true if Val falls within the specified range (L, H].
5546static bool isInRange(int Val, int Low, int Hi) {
5547 return (Val >= Low && Val < Hi);
5548}
5549
5550/// Return true if the value of any element in Mask falls within the specified
5551/// range (L, H].
5552static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5553 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5554}
5555
5556/// Return true if the value of any element in Mask is the zero sentinel value.
5557static bool isAnyZero(ArrayRef<int> Mask) {
5558 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5559}
5560
5561/// Return true if the value of any element in Mask is the zero or undef
5562/// sentinel values.
5563static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5564 return llvm::any_of(Mask, [](int M) {
5565 return M == SM_SentinelZero || M == SM_SentinelUndef;
5566 });
5567}
5568
5569/// Return true if Val is undef or if its value falls within the
5570/// specified range (L, H].
5571static bool isUndefOrInRange(int Val, int Low, int Hi) {
5572 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5573}
5574
5575/// Return true if every element in Mask is undef or if its value
5576/// falls within the specified range (L, H].
5577static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5578 return llvm::all_of(
5579 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5580}
5581
5582/// Return true if Val is undef, zero or if its value falls within the
5583/// specified range (L, H].
5584static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5585 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5586}
5587
5588/// Return true if every element in Mask is undef, zero or if its value
5589/// falls within the specified range (L, H].
5590static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5591 return llvm::all_of(
5592 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5593}
5594
5595/// Return true if every element in Mask, beginning
5596/// from position Pos and ending in Pos + Size, falls within the specified
5597/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5598static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5599 unsigned Size, int Low, int Step = 1) {
5600 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5601 if (!isUndefOrEqual(Mask[i], Low))
5602 return false;
5603 return true;
5604}
5605
5606/// Return true if every element in Mask, beginning
5607/// from position Pos and ending in Pos+Size, falls within the specified
5608/// sequential range (Low, Low+Size], or is undef or is zero.
5609static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5610 unsigned Size, int Low,
5611 int Step = 1) {
5612 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5613 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5614 return false;
5615 return true;
5616}
5617
5618/// Return true if every element in Mask, beginning
5619/// from position Pos and ending in Pos+Size is undef or is zero.
5620static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5621 unsigned Size) {
5622 return llvm::all_of(Mask.slice(Pos, Size),
5623 [](int M) { return isUndefOrZero(M); });
5624}
5625
5626/// Helper function to test whether a shuffle mask could be
5627/// simplified by widening the elements being shuffled.
5628///
5629/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5630/// leaves it in an unspecified state.
5631///
5632/// NOTE: This must handle normal vector shuffle masks and *target* vector
5633/// shuffle masks. The latter have the special property of a '-2' representing
5634/// a zero-ed lane of a vector.
5635static bool canWidenShuffleElements(ArrayRef<int> Mask,
5636 SmallVectorImpl<int> &WidenedMask) {
5637 WidenedMask.assign(Mask.size() / 2, 0);
5638 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5639 int M0 = Mask[i];
5640 int M1 = Mask[i + 1];
5641
5642 // If both elements are undef, its trivial.
5643 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5644 WidenedMask[i / 2] = SM_SentinelUndef;
5645 continue;
5646 }
5647
5648 // Check for an undef mask and a mask value properly aligned to fit with
5649 // a pair of values. If we find such a case, use the non-undef mask's value.
5650 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5651 WidenedMask[i / 2] = M1 / 2;
5652 continue;
5653 }
5654 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5655 WidenedMask[i / 2] = M0 / 2;
5656 continue;
5657 }
5658
5659 // When zeroing, we need to spread the zeroing across both lanes to widen.
5660 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5661 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5662 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5663 WidenedMask[i / 2] = SM_SentinelZero;
5664 continue;
5665 }
5666 return false;
5667 }
5668
5669 // Finally check if the two mask values are adjacent and aligned with
5670 // a pair.
5671 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5672 WidenedMask[i / 2] = M0 / 2;
5673 continue;
5674 }
5675
5676 // Otherwise we can't safely widen the elements used in this shuffle.
5677 return false;
5678 }
5679 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5680, __PRETTY_FUNCTION__))
5680 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5680, __PRETTY_FUNCTION__))
;
5681
5682 return true;
5683}
5684
5685static bool canWidenShuffleElements(ArrayRef<int> Mask,
5686 const APInt &Zeroable,
5687 bool V2IsZero,
5688 SmallVectorImpl<int> &WidenedMask) {
5689 // Create an alternative mask with info about zeroable elements.
5690 // Here we do not set undef elements as zeroable.
5691 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5692 if (V2IsZero) {
5693 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((!Zeroable.isNullValue() && "V2's non-undef elements are used?!"
) ? static_cast<void> (0) : __assert_fail ("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
;
5694 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5695 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5696 ZeroableMask[i] = SM_SentinelZero;
5697 }
5698 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5699}
5700
5701static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5702 SmallVector<int, 32> WidenedMask;
5703 return canWidenShuffleElements(Mask, WidenedMask);
5704}
5705
5706// Attempt to narrow/widen shuffle mask until it matches the target number of
5707// elements.
5708static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
5709 SmallVectorImpl<int> &ScaledMask) {
5710 unsigned NumSrcElts = Mask.size();
5711 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&((((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts
) == 0) && "Illegal shuffle scale factor") ? static_cast
<void> (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5712, __PRETTY_FUNCTION__))
5712 "Illegal shuffle scale factor")((((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts
) == 0) && "Illegal shuffle scale factor") ? static_cast
<void> (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5712, __PRETTY_FUNCTION__))
;
5713
5714 // Narrowing is guaranteed to work.
5715 if (NumDstElts >= NumSrcElts) {
5716 int Scale = NumDstElts / NumSrcElts;
5717 llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask);
5718 return true;
5719 }
5720
5721 // We have to repeat the widening until we reach the target size, but we can
5722 // split out the first widening as it sets up ScaledMask for us.
5723 if (canWidenShuffleElements(Mask, ScaledMask)) {
5724 while (ScaledMask.size() > NumDstElts) {
5725 SmallVector<int, 16> WidenedMask;
5726 if (!canWidenShuffleElements(ScaledMask, WidenedMask))
5727 return false;
5728 ScaledMask = std::move(WidenedMask);
5729 }
5730 return true;
5731 }
5732
5733 return false;
5734}
5735
5736/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5737bool X86::isZeroNode(SDValue Elt) {
5738 return isNullConstant(Elt) || isNullFPConstant(Elt);
5739}
5740
5741// Build a vector of constants.
5742// Use an UNDEF node if MaskElt == -1.
5743// Split 64-bit constants in the 32-bit mode.
5744static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5745 const SDLoc &dl, bool IsMask = false) {
5746
5747 SmallVector<SDValue, 32> Ops;
5748 bool Split = false;
5749
5750 MVT ConstVecVT = VT;
5751 unsigned NumElts = VT.getVectorNumElements();
5752 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5753 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5754 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5755 Split = true;
5756 }
5757
5758 MVT EltVT = ConstVecVT.getVectorElementType();
5759 for (unsigned i = 0; i < NumElts; ++i) {
5760 bool IsUndef = Values[i] < 0 && IsMask;
5761 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5762 DAG.getConstant(Values[i], dl, EltVT);
5763 Ops.push_back(OpNode);
5764 if (Split)
5765 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5766 DAG.getConstant(0, dl, EltVT));
5767 }
5768 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5769 if (Split)
5770 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5771 return ConstsNode;
5772}
5773
5774static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5775 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5776 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5777, __PRETTY_FUNCTION__))
5777 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5777, __PRETTY_FUNCTION__))
;
5778 SmallVector<SDValue, 32> Ops;
5779 bool Split = false;
5780
5781 MVT ConstVecVT = VT;
5782 unsigned NumElts = VT.getVectorNumElements();
5783 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5784 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5785 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5786 Split = true;
5787 }
5788
5789 MVT EltVT = ConstVecVT.getVectorElementType();
5790 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5791 if (Undefs[i]) {
5792 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5793 continue;
5794 }
5795 const APInt &V = Bits[i];
5796 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5796, __PRETTY_FUNCTION__))
;
5797 if (Split) {
5798 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5799 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5800 } else if (EltVT == MVT::f32) {
5801 APFloat FV(APFloat::IEEEsingle(), V);
5802 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5803 } else if (EltVT == MVT::f64) {
5804 APFloat FV(APFloat::IEEEdouble(), V);
5805 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5806 } else {
5807 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5808 }
5809 }
5810
5811 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5812 return DAG.getBitcast(VT, ConstsNode);
5813}
5814
5815/// Returns a vector of specified type with all zero elements.
5816static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5817 SelectionDAG &DAG, const SDLoc &dl) {
5818 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5820, __PRETTY_FUNCTION__))
5819 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5820, __PRETTY_FUNCTION__))
5820 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5820, __PRETTY_FUNCTION__))
;
5821
5822 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5823 // type. This ensures they get CSE'd. But if the integer type is not
5824 // available, use a floating-point +0.0 instead.
5825 SDValue Vec;
5826 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5827 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5828 } else if (VT.isFloatingPoint()) {
5829 Vec = DAG.getConstantFP(+0.0, dl, VT);
5830 } else if (VT.getVectorElementType() == MVT::i1) {
5831 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5832, __PRETTY_FUNCTION__))
5832 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5832, __PRETTY_FUNCTION__))
;
5833 Vec = DAG.getConstant(0, dl, VT);
5834 } else {
5835 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5836 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5837 }
5838 return DAG.getBitcast(VT, Vec);
5839}
5840
5841static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5842 const SDLoc &dl, unsigned vectorWidth) {
5843 EVT VT = Vec.getValueType();
5844 EVT ElVT = VT.getVectorElementType();
5845 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5846 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5847 VT.getVectorNumElements()/Factor);
5848
5849 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5850 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5851 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5851, __PRETTY_FUNCTION__))
;
5852
5853 // This is the index of the first element of the vectorWidth-bit chunk
5854 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5855 IdxVal &= ~(ElemsPerChunk - 1);
5856
5857 // If the input is a buildvector just emit a smaller one.
5858 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5859 return DAG.getBuildVector(ResultVT, dl,
5860 Vec->ops().slice(IdxVal, ElemsPerChunk));
5861
5862 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5863 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5864}
5865
5866/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5867/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5868/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5869/// instructions or a simple subregister reference. Idx is an index in the
5870/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5871/// lowering EXTRACT_VECTOR_ELT operations easier.
5872static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5873 SelectionDAG &DAG, const SDLoc &dl) {
5874 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5875, __PRETTY_FUNCTION__))
5875 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5875, __PRETTY_FUNCTION__))
;
5876 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5877}
5878
5879/// Generate a DAG to grab 256-bits from a 512-bit vector.
5880static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5881 SelectionDAG &DAG, const SDLoc &dl) {
5882 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5882, __PRETTY_FUNCTION__))
;
5883 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5884}
5885
5886static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5887 SelectionDAG &DAG, const SDLoc &dl,
5888 unsigned vectorWidth) {
5889 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5890, __PRETTY_FUNCTION__))
5890 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5890, __PRETTY_FUNCTION__))
;
5891 // Inserting UNDEF is Result
5892 if (Vec.isUndef())
5893 return Result;
5894 EVT VT = Vec.getValueType();
5895 EVT ElVT = VT.getVectorElementType();
5896 EVT ResultVT = Result.getValueType();
5897
5898 // Insert the relevant vectorWidth bits.
5899 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5900 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5900, __PRETTY_FUNCTION__))
;
5901
5902 // This is the index of the first element of the vectorWidth-bit chunk
5903 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5904 IdxVal &= ~(ElemsPerChunk - 1);
5905
5906 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5907 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5908}
5909
5910/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5911/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5912/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5913/// simple superregister reference. Idx is an index in the 128 bits
5914/// we want. It need not be aligned to a 128-bit boundary. That makes
5915/// lowering INSERT_VECTOR_ELT operations easier.
5916static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5917 SelectionDAG &DAG, const SDLoc &dl) {
5918 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5918, __PRETTY_FUNCTION__))
;
5919 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);