Bug Summary

File:llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 36216, column 5
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-12-11-210320-5824-1 -x c++ /build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/IntrinsicLowering.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineJumpTableInfo.h"
38#include "llvm/CodeGen/MachineModuleInfo.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetLowering.h"
41#include "llvm/CodeGen/WinEHFuncInfo.h"
42#include "llvm/IR/CallingConv.h"
43#include "llvm/IR/Constants.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/DiagnosticInfo.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalAlias.h"
48#include "llvm/IR/GlobalVariable.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/MC/MCAsmInfo.h"
52#include "llvm/MC/MCContext.h"
53#include "llvm/MC/MCExpr.h"
54#include "llvm/MC/MCSymbol.h"
55#include "llvm/Support/CommandLine.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/Support/KnownBits.h"
59#include "llvm/Support/MathExtras.h"
60#include "llvm/Target/TargetOptions.h"
61#include <algorithm>
62#include <bitset>
63#include <cctype>
64#include <numeric>
65using namespace llvm;
66
67#define DEBUG_TYPE"x86-isel" "x86-isel"
68
69STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
70
71static cl::opt<int> ExperimentalPrefLoopAlignment(
72 "x86-experimental-pref-loop-alignment", cl::init(4),
73 cl::desc(
74 "Sets the preferable loop alignment for experiments (as log2 bytes)"
75 "(the last x86-experimental-pref-loop-alignment bits"
76 " of the loop header PC will be 0)."),
77 cl::Hidden);
78
79static cl::opt<bool> MulConstantOptimization(
80 "mul-constant-optimization", cl::init(true),
81 cl::desc("Replace 'mul x, Const' with more effective instructions like "
82 "SHIFT, LEA, etc."),
83 cl::Hidden);
84
85static cl::opt<bool> ExperimentalUnorderedISEL(
86 "x86-experimental-unordered-atomic-isel", cl::init(false),
87 cl::desc("Use LoadSDNode and StoreSDNode instead of "
88 "AtomicSDNode for unordered atomic loads and "
89 "stores respectively."),
90 cl::Hidden);
91
92/// Call this when the user attempts to do something unsupported, like
93/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
94/// report_fatal_error, so calling code should attempt to recover without
95/// crashing.
96static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
97 const char *Msg) {
98 MachineFunction &MF = DAG.getMachineFunction();
99 DAG.getContext()->diagnose(
100 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
101}
102
103X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
104 const X86Subtarget &STI)
105 : TargetLowering(TM), Subtarget(STI) {
106 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
107 X86ScalarSSEf64 = Subtarget.hasSSE2();
108 X86ScalarSSEf32 = Subtarget.hasSSE1();
109 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
110
111 // Set up the TargetLowering object.
112
113 // X86 is weird. It always uses i8 for shift amounts and setcc results.
114 setBooleanContents(ZeroOrOneBooleanContent);
115 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
116 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
117
118 // For 64-bit, since we have so many registers, use the ILP scheduler.
119 // For 32-bit, use the register pressure specific scheduling.
120 // For Atom, always use ILP scheduling.
121 if (Subtarget.isAtom())
122 setSchedulingPreference(Sched::ILP);
123 else if (Subtarget.is64Bit())
124 setSchedulingPreference(Sched::ILP);
125 else
126 setSchedulingPreference(Sched::RegPressure);
127 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
128 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
129
130 // Bypass expensive divides and use cheaper ones.
131 if (TM.getOptLevel() >= CodeGenOpt::Default) {
132 if (Subtarget.hasSlowDivide32())
133 addBypassSlowDiv(32, 8);
134 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
135 addBypassSlowDiv(64, 32);
136 }
137
138 // Setup Windows compiler runtime calls.
139 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
140 static const struct {
141 const RTLIB::Libcall Op;
142 const char * const Name;
143 const CallingConv::ID CC;
144 } LibraryCalls[] = {
145 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
146 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
147 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
148 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
149 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
150 };
151
152 for (const auto &LC : LibraryCalls) {
153 setLibcallName(LC.Op, LC.Name);
154 setLibcallCallingConv(LC.Op, LC.CC);
155 }
156 }
157
158 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
159 // MSVCRT doesn't have powi; fall back to pow
160 setLibcallName(RTLIB::POWI_F32, nullptr);
161 setLibcallName(RTLIB::POWI_F64, nullptr);
162 }
163
164 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
165 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
166 // FIXME: Should we be limiting the atomic size on other configs? Default is
167 // 1024.
168 if (!Subtarget.hasCmpxchg8b())
169 setMaxAtomicSizeInBitsSupported(32);
170
171 // Set up the register classes.
172 addRegisterClass(MVT::i8, &X86::GR8RegClass);
173 addRegisterClass(MVT::i16, &X86::GR16RegClass);
174 addRegisterClass(MVT::i32, &X86::GR32RegClass);
175 if (Subtarget.is64Bit())
176 addRegisterClass(MVT::i64, &X86::GR64RegClass);
177
178 for (MVT VT : MVT::integer_valuetypes())
179 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
180
181 // We don't accept any truncstore of integer registers.
182 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
183 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
184 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
185 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
186 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
187 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
188
189 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
190
191 // SETOEQ and SETUNE require checking two conditions.
192 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
193 setCondCodeAction(ISD::SETOEQ, VT, Expand);
194 setCondCodeAction(ISD::SETUNE, VT, Expand);
195 }
196
197 // Integer absolute.
198 if (Subtarget.hasCMov()) {
199 setOperationAction(ISD::ABS , MVT::i16 , Custom);
200 setOperationAction(ISD::ABS , MVT::i32 , Custom);
201 if (Subtarget.is64Bit())
202 setOperationAction(ISD::ABS , MVT::i64 , Custom);
203 }
204
205 // Funnel shifts.
206 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
207 // For slow shld targets we only lower for code size.
208 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
209
210 setOperationAction(ShiftOp , MVT::i8 , Custom);
211 setOperationAction(ShiftOp , MVT::i16 , Custom);
212 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
213 if (Subtarget.is64Bit())
214 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
215 }
216
217 if (!Subtarget.useSoftFloat()) {
218 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
219 // operation.
220 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
221 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
222 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
223 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
224 // We have an algorithm for SSE2, and we turn this into a 64-bit
225 // FILD or VCVTUSI2SS/SD for other targets.
226 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
227 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
228 // We have an algorithm for SSE2->double, and we turn this into a
229 // 64-bit FILD followed by conditional FADD for other targets.
230 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
231 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
232
233 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
234 // this operation.
235 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
236 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
237 // SSE has no i16 to fp conversion, only i32. We promote in the handler
238 // to allow f80 to use i16 and f64 to use i16 with sse1 only
239 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
240 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
241 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
242 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
243 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
244 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
245 // are Legal, f80 is custom lowered.
246 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
247 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
248
249 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
250 // this operation.
251 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
252 // FIXME: This doesn't generate invalid exception when it should. PR44019.
253 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
254 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
255 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
256 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
257 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
258 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
259 // are Legal, f80 is custom lowered.
260 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
261 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
262
263 // Handle FP_TO_UINT by promoting the destination to a larger signed
264 // conversion.
265 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
266 // FIXME: This doesn't generate invalid exception when it should. PR44019.
267 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
268 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
269 // FIXME: This doesn't generate invalid exception when it should. PR44019.
270 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
271 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
272 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
273 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
274 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
275
276 setOperationAction(ISD::LRINT, MVT::f32, Custom);
277 setOperationAction(ISD::LRINT, MVT::f64, Custom);
278 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
279 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
280
281 if (!Subtarget.is64Bit()) {
282 setOperationAction(ISD::LRINT, MVT::i64, Custom);
283 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
284 }
285 }
286
287 // Handle address space casts between mixed sized pointers.
288 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
289 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
290
291 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
292 if (!X86ScalarSSEf64) {
293 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
294 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
295 if (Subtarget.is64Bit()) {
296 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
297 // Without SSE, i64->f64 goes through memory.
298 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
299 }
300 } else if (!Subtarget.is64Bit())
301 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
302
303 // Scalar integer divide and remainder are lowered to use operations that
304 // produce two results, to match the available instructions. This exposes
305 // the two-result form to trivial CSE, which is able to combine x/y and x%y
306 // into a single instruction.
307 //
308 // Scalar integer multiply-high is also lowered to use two-result
309 // operations, to match the available instructions. However, plain multiply
310 // (low) operations are left as Legal, as there are single-result
311 // instructions for this in x86. Using the two-result multiply instructions
312 // when both high and low results are needed must be arranged by dagcombine.
313 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
314 setOperationAction(ISD::MULHS, VT, Expand);
315 setOperationAction(ISD::MULHU, VT, Expand);
316 setOperationAction(ISD::SDIV, VT, Expand);
317 setOperationAction(ISD::UDIV, VT, Expand);
318 setOperationAction(ISD::SREM, VT, Expand);
319 setOperationAction(ISD::UREM, VT, Expand);
320 }
321
322 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
323 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
324 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
325 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
326 setOperationAction(ISD::BR_CC, VT, Expand);
327 setOperationAction(ISD::SELECT_CC, VT, Expand);
328 }
329 if (Subtarget.is64Bit())
330 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
331 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
332 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
333 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
334
335 setOperationAction(ISD::FREM , MVT::f32 , Expand);
336 setOperationAction(ISD::FREM , MVT::f64 , Expand);
337 setOperationAction(ISD::FREM , MVT::f80 , Expand);
338 setOperationAction(ISD::FREM , MVT::f128 , Expand);
339 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
340
341 // Promote the i8 variants and force them on up to i32 which has a shorter
342 // encoding.
343 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
344 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
345 if (!Subtarget.hasBMI()) {
346 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
347 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
348 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
349 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
350 if (Subtarget.is64Bit()) {
351 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
352 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
353 }
354 }
355
356 if (Subtarget.hasLZCNT()) {
357 // When promoting the i8 variants, force them to i32 for a shorter
358 // encoding.
359 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
360 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
361 } else {
362 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
363 if (VT == MVT::i64 && !Subtarget.is64Bit())
364 continue;
365 setOperationAction(ISD::CTLZ , VT, Custom);
366 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
367 }
368 }
369
370 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
371 ISD::STRICT_FP_TO_FP16}) {
372 // Special handling for half-precision floating point conversions.
373 // If we don't have F16C support, then lower half float conversions
374 // into library calls.
375 setOperationAction(
376 Op, MVT::f32,
377 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
378 // There's never any support for operations beyond MVT::f32.
379 setOperationAction(Op, MVT::f64, Expand);
380 setOperationAction(Op, MVT::f80, Expand);
381 setOperationAction(Op, MVT::f128, Expand);
382 }
383
384 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
385 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
386 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
387 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
388 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
389 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
390 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
391 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
392
393 setOperationAction(ISD::PARITY, MVT::i8, Custom);
394 if (Subtarget.hasPOPCNT()) {
395 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
396 } else {
397 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
398 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
399 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
400 if (Subtarget.is64Bit())
401 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
402 else
403 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
404
405 setOperationAction(ISD::PARITY, MVT::i16, Custom);
406 setOperationAction(ISD::PARITY, MVT::i32, Custom);
407 if (Subtarget.is64Bit())
408 setOperationAction(ISD::PARITY, MVT::i64, Custom);
409 }
410
411 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
412
413 if (!Subtarget.hasMOVBE())
414 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
415
416 // X86 wants to expand cmov itself.
417 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
418 setOperationAction(ISD::SELECT, VT, Custom);
419 setOperationAction(ISD::SETCC, VT, Custom);
420 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
421 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
422 }
423 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
424 if (VT == MVT::i64 && !Subtarget.is64Bit())
425 continue;
426 setOperationAction(ISD::SELECT, VT, Custom);
427 setOperationAction(ISD::SETCC, VT, Custom);
428 }
429
430 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
431 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
432 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
433
434 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
435 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
436 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
437 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
438 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
439 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
440 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
441 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
442
443 // Darwin ABI issue.
444 for (auto VT : { MVT::i32, MVT::i64 }) {
445 if (VT == MVT::i64 && !Subtarget.is64Bit())
446 continue;
447 setOperationAction(ISD::ConstantPool , VT, Custom);
448 setOperationAction(ISD::JumpTable , VT, Custom);
449 setOperationAction(ISD::GlobalAddress , VT, Custom);
450 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
451 setOperationAction(ISD::ExternalSymbol , VT, Custom);
452 setOperationAction(ISD::BlockAddress , VT, Custom);
453 }
454
455 // 64-bit shl, sra, srl (iff 32-bit x86)
456 for (auto VT : { MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SHL_PARTS, VT, Custom);
460 setOperationAction(ISD::SRA_PARTS, VT, Custom);
461 setOperationAction(ISD::SRL_PARTS, VT, Custom);
462 }
463
464 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
465 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
466
467 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
468
469 // Expand certain atomics
470 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
471 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
472 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
473 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
477 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
478 }
479
480 if (!Subtarget.is64Bit())
481 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
482
483 if (Subtarget.hasCmpxchg16b()) {
484 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
485 }
486
487 // FIXME - use subtarget debug flags
488 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
489 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
490 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
491 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
492 }
493
494 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
495 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
496
497 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
498 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
499
500 setOperationAction(ISD::TRAP, MVT::Other, Legal);
501 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
502 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
503
504 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
505 setOperationAction(ISD::VASTART , MVT::Other, Custom);
506 setOperationAction(ISD::VAEND , MVT::Other, Expand);
507 bool Is64Bit = Subtarget.is64Bit();
508 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
509 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
510
511 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
512 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
513
514 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
515
516 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
517 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
518 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
519
520 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
521 // f32 and f64 use SSE.
522 // Set up the FP register classes.
523 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
524 : &X86::FR32RegClass);
525 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
526 : &X86::FR64RegClass);
527
528 // Disable f32->f64 extload as we can only generate this in one instruction
529 // under optsize. So its easier to pattern match (fpext (load)) for that
530 // case instead of needing to emit 2 instructions for extload in the
531 // non-optsize case.
532 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
533
534 for (auto VT : { MVT::f32, MVT::f64 }) {
535 // Use ANDPD to simulate FABS.
536 setOperationAction(ISD::FABS, VT, Custom);
537
538 // Use XORP to simulate FNEG.
539 setOperationAction(ISD::FNEG, VT, Custom);
540
541 // Use ANDPD and ORPD to simulate FCOPYSIGN.
542 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
543
544 // These might be better off as horizontal vector ops.
545 setOperationAction(ISD::FADD, VT, Custom);
546 setOperationAction(ISD::FSUB, VT, Custom);
547
548 // We don't support sin/cos/fmod
549 setOperationAction(ISD::FSIN , VT, Expand);
550 setOperationAction(ISD::FCOS , VT, Expand);
551 setOperationAction(ISD::FSINCOS, VT, Expand);
552 }
553
554 // Lower this to MOVMSK plus an AND.
555 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
556 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
557
558 } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
559 (UseX87 || Is64Bit)) {
560 // Use SSE for f32, x87 for f64.
561 // Set up the FP register classes.
562 addRegisterClass(MVT::f32, &X86::FR32RegClass);
563 if (UseX87)
564 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
565
566 // Use ANDPS to simulate FABS.
567 setOperationAction(ISD::FABS , MVT::f32, Custom);
568
569 // Use XORP to simulate FNEG.
570 setOperationAction(ISD::FNEG , MVT::f32, Custom);
571
572 if (UseX87)
573 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
574
575 // Use ANDPS and ORPS to simulate FCOPYSIGN.
576 if (UseX87)
577 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
578 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
579
580 // We don't support sin/cos/fmod
581 setOperationAction(ISD::FSIN , MVT::f32, Expand);
582 setOperationAction(ISD::FCOS , MVT::f32, Expand);
583 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
584
585 if (UseX87) {
586 // Always expand sin/cos functions even though x87 has an instruction.
587 setOperationAction(ISD::FSIN, MVT::f64, Expand);
588 setOperationAction(ISD::FCOS, MVT::f64, Expand);
589 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
590 }
591 } else if (UseX87) {
592 // f32 and f64 in x87.
593 // Set up the FP register classes.
594 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
595 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
596
597 for (auto VT : { MVT::f32, MVT::f64 }) {
598 setOperationAction(ISD::UNDEF, VT, Expand);
599 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
600
601 // Always expand sin/cos functions even though x87 has an instruction.
602 setOperationAction(ISD::FSIN , VT, Expand);
603 setOperationAction(ISD::FCOS , VT, Expand);
604 setOperationAction(ISD::FSINCOS, VT, Expand);
605 }
606 }
607
608 // Expand FP32 immediates into loads from the stack, save special cases.
609 if (isTypeLegal(MVT::f32)) {
610 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
611 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
612 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
613 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
614 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
615 } else // SSE immediates.
616 addLegalFPImmediate(APFloat(+0.0f)); // xorps
617 }
618 // Expand FP64 immediates into loads from the stack, save special cases.
619 if (isTypeLegal(MVT::f64)) {
620 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
621 addLegalFPImmediate(APFloat(+0.0)); // FLD0
622 addLegalFPImmediate(APFloat(+1.0)); // FLD1
623 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
624 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
625 } else // SSE immediates.
626 addLegalFPImmediate(APFloat(+0.0)); // xorpd
627 }
628 // Handle constrained floating-point operations of scalar.
629 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
630 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
631 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
632 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
633 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
634 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
635 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
636 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
637 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
638 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
639 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
640 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
641 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
642
643 // We don't support FMA.
644 setOperationAction(ISD::FMA, MVT::f64, Expand);
645 setOperationAction(ISD::FMA, MVT::f32, Expand);
646
647 // f80 always uses X87.
648 if (UseX87) {
649 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
650 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
651 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
652 {
653 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
654 addLegalFPImmediate(TmpFlt); // FLD0
655 TmpFlt.changeSign();
656 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
657
658 bool ignored;
659 APFloat TmpFlt2(+1.0);
660 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
661 &ignored);
662 addLegalFPImmediate(TmpFlt2); // FLD1
663 TmpFlt2.changeSign();
664 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
665 }
666
667 // Always expand sin/cos functions even though x87 has an instruction.
668 setOperationAction(ISD::FSIN , MVT::f80, Expand);
669 setOperationAction(ISD::FCOS , MVT::f80, Expand);
670 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
671
672 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
673 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
674 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
675 setOperationAction(ISD::FRINT, MVT::f80, Expand);
676 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
677 setOperationAction(ISD::FMA, MVT::f80, Expand);
678 setOperationAction(ISD::LROUND, MVT::f80, Expand);
679 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
680 setOperationAction(ISD::LRINT, MVT::f80, Custom);
681 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
682
683 // Handle constrained floating-point operations of scalar.
684 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
685 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
686 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
687 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
688 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
689 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
690 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
691 // as Custom.
692 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
693 }
694
695 // f128 uses xmm registers, but most operations require libcalls.
696 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
697 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
698 : &X86::VR128RegClass);
699
700 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
701
702 setOperationAction(ISD::FADD, MVT::f128, LibCall);
703 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
704 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
705 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
706 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
707 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
708 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
709 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
710 setOperationAction(ISD::FMA, MVT::f128, LibCall);
711 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
712
713 setOperationAction(ISD::FABS, MVT::f128, Custom);
714 setOperationAction(ISD::FNEG, MVT::f128, Custom);
715 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
716
717 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
718 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
719 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
720 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
721 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
722 // No STRICT_FSINCOS
723 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
724 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
725
726 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
727 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
728 // We need to custom handle any FP_ROUND with an f128 input, but
729 // LegalizeDAG uses the result type to know when to run a custom handler.
730 // So we have to list all legal floating point result types here.
731 if (isTypeLegal(MVT::f32)) {
732 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
733 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
734 }
735 if (isTypeLegal(MVT::f64)) {
736 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
737 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
738 }
739 if (isTypeLegal(MVT::f80)) {
740 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
741 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
742 }
743
744 setOperationAction(ISD::SETCC, MVT::f128, Custom);
745
746 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
747 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
748 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
749 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
750 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
751 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
752 }
753
754 // Always use a library call for pow.
755 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
756 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
757 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
758 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
759
760 setOperationAction(ISD::FLOG, MVT::f80, Expand);
761 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
762 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
763 setOperationAction(ISD::FEXP, MVT::f80, Expand);
764 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
765 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
766 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
767
768 // Some FP actions are always expanded for vector types.
769 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
770 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
771 setOperationAction(ISD::FSIN, VT, Expand);
772 setOperationAction(ISD::FSINCOS, VT, Expand);
773 setOperationAction(ISD::FCOS, VT, Expand);
774 setOperationAction(ISD::FREM, VT, Expand);
775 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
776 setOperationAction(ISD::FPOW, VT, Expand);
777 setOperationAction(ISD::FLOG, VT, Expand);
778 setOperationAction(ISD::FLOG2, VT, Expand);
779 setOperationAction(ISD::FLOG10, VT, Expand);
780 setOperationAction(ISD::FEXP, VT, Expand);
781 setOperationAction(ISD::FEXP2, VT, Expand);
782 }
783
784 // First set operation action for all vector types to either promote
785 // (for widening) or expand (for scalarization). Then we will selectively
786 // turn on ones that can be effectively codegen'd.
787 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
788 setOperationAction(ISD::SDIV, VT, Expand);
789 setOperationAction(ISD::UDIV, VT, Expand);
790 setOperationAction(ISD::SREM, VT, Expand);
791 setOperationAction(ISD::UREM, VT, Expand);
792 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
793 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
794 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
795 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
796 setOperationAction(ISD::FMA, VT, Expand);
797 setOperationAction(ISD::FFLOOR, VT, Expand);
798 setOperationAction(ISD::FCEIL, VT, Expand);
799 setOperationAction(ISD::FTRUNC, VT, Expand);
800 setOperationAction(ISD::FRINT, VT, Expand);
801 setOperationAction(ISD::FNEARBYINT, VT, Expand);
802 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
803 setOperationAction(ISD::MULHS, VT, Expand);
804 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
805 setOperationAction(ISD::MULHU, VT, Expand);
806 setOperationAction(ISD::SDIVREM, VT, Expand);
807 setOperationAction(ISD::UDIVREM, VT, Expand);
808 setOperationAction(ISD::CTPOP, VT, Expand);
809 setOperationAction(ISD::CTTZ, VT, Expand);
810 setOperationAction(ISD::CTLZ, VT, Expand);
811 setOperationAction(ISD::ROTL, VT, Expand);
812 setOperationAction(ISD::ROTR, VT, Expand);
813 setOperationAction(ISD::BSWAP, VT, Expand);
814 setOperationAction(ISD::SETCC, VT, Expand);
815 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
816 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
817 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
818 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
819 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
820 setOperationAction(ISD::TRUNCATE, VT, Expand);
821 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
822 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
823 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
824 setOperationAction(ISD::SELECT_CC, VT, Expand);
825 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
826 setTruncStoreAction(InnerVT, VT, Expand);
827
828 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
829 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
830
831 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
832 // types, we have to deal with them whether we ask for Expansion or not.
833 // Setting Expand causes its own optimisation problems though, so leave
834 // them legal.
835 if (VT.getVectorElementType() == MVT::i1)
836 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
837
838 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
839 // split/scalarized right now.
840 if (VT.getVectorElementType() == MVT::f16)
841 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
842 }
843 }
844
845 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
846 // with -msoft-float, disable use of MMX as well.
847 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
848 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
849 // No operations on x86mmx supported, everything uses intrinsics.
850 }
851
852 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
853 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
854 : &X86::VR128RegClass);
855
856 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
857 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
858 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
859 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
860 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
861 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
862 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
863 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
864
865 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
866 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
867
868 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
869 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
870 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
871 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
872 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
873 }
874
875 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
876 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
877 : &X86::VR128RegClass);
878
879 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
880 // registers cannot be used even for integer operations.
881 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
882 : &X86::VR128RegClass);
883 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
884 : &X86::VR128RegClass);
885 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
886 : &X86::VR128RegClass);
887 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
888 : &X86::VR128RegClass);
889
890 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
891 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
892 setOperationAction(ISD::SDIV, VT, Custom);
893 setOperationAction(ISD::SREM, VT, Custom);
894 setOperationAction(ISD::UDIV, VT, Custom);
895 setOperationAction(ISD::UREM, VT, Custom);
896 }
897
898 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
899 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
900 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
901
902 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
903 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
904 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
905 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
906 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
907 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
908 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
909 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
910 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
911 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
912 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
913 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
914 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
915
916 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
917 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
918 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
919 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
920 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
921 }
922
923 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
924 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
925 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
926 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
927 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
928 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
929 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
930 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
931 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
932 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
933
934 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
935 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
936 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
937
938 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
939 setOperationAction(ISD::SETCC, VT, Custom);
940 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
941 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
942 setOperationAction(ISD::CTPOP, VT, Custom);
943 setOperationAction(ISD::ABS, VT, Custom);
944
945 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
946 // setcc all the way to isel and prefer SETGT in some isel patterns.
947 setCondCodeAction(ISD::SETLT, VT, Custom);
948 setCondCodeAction(ISD::SETLE, VT, Custom);
949 }
950
951 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
952 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
953 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
954 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
955 setOperationAction(ISD::VSELECT, VT, Custom);
956 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
957 }
958
959 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
960 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
961 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
962 setOperationAction(ISD::VSELECT, VT, Custom);
963
964 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
965 continue;
966
967 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
968 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
969 }
970
971 // Custom lower v2i64 and v2f64 selects.
972 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
973 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
974 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
975 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
976 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
977
978 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
979 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
980 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
981 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
982
983 // Custom legalize these to avoid over promotion or custom promotion.
984 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
985 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
986 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
987 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
988 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
989 }
990
991 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
992 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
993 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
994 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
995
996 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
997 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
998
999 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1000 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1001
1002 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1003 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1004 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1005 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1006 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1007
1008 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1009 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1010 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1011 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1012
1013 // We want to legalize this to an f64 load rather than an i64 load on
1014 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1015 // store.
1016 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1017 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1018 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1019 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1020 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1021 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1022
1023 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1024 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1025 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1026 if (!Subtarget.hasAVX512())
1027 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1028
1029 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1030 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1031 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1032
1033 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1034
1035 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1036 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1037 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1038 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1039 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1040 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1041
1042 // In the customized shift lowering, the legal v4i32/v2i64 cases
1043 // in AVX2 will be recognized.
1044 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1045 setOperationAction(ISD::SRL, VT, Custom);
1046 setOperationAction(ISD::SHL, VT, Custom);
1047 setOperationAction(ISD::SRA, VT, Custom);
1048 }
1049
1050 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1051 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1052
1053 // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
1054 // shifts) is better.
1055 if (!Subtarget.useAVX512Regs() &&
1056 !(Subtarget.hasBWI() && Subtarget.hasVLX()))
1057 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1058
1059 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1060 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1061 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1062 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1063 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1064 }
1065
1066 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1067 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1068 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1069 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1070 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1071 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1072 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1073 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1074 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1075
1076 // These might be better off as horizontal vector ops.
1077 setOperationAction(ISD::ADD, MVT::i16, Custom);
1078 setOperationAction(ISD::ADD, MVT::i32, Custom);
1079 setOperationAction(ISD::SUB, MVT::i16, Custom);
1080 setOperationAction(ISD::SUB, MVT::i32, Custom);
1081 }
1082
1083 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1084 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1085 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1086 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1087 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1088 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1089 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1090 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1091 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1092 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1093 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1094 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1095 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1096 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1097
1098 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1099 }
1100
1101 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1102 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1103 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1104 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1105 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1106 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1107 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1108 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1109
1110 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1111
1112 // FIXME: Do we need to handle scalar-to-vector here?
1113 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1114
1115 // We directly match byte blends in the backend as they match the VSELECT
1116 // condition form.
1117 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1118
1119 // SSE41 brings specific instructions for doing vector sign extend even in
1120 // cases where we don't have SRA.
1121 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1122 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1123 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1124 }
1125
1126 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1127 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1128 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1129 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1130 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1131 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1132 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1133 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1134 }
1135
1136 // i8 vectors are custom because the source register and source
1137 // source memory operand types are not the same width.
1138 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1139
1140 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1141 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1142 // do the pre and post work in the vector domain.
1143 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1144 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1145 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1146 // so that DAG combine doesn't try to turn it into uint_to_fp.
1147 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1148 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1149 }
1150 }
1151
1152 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1153 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1154 }
1155
1156 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1157 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1158 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1159 setOperationAction(ISD::ROTL, VT, Custom);
1160
1161 // XOP can efficiently perform BITREVERSE with VPPERM.
1162 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1163 setOperationAction(ISD::BITREVERSE, VT, Custom);
1164
1165 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1166 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1167 setOperationAction(ISD::BITREVERSE, VT, Custom);
1168 }
1169
1170 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1171 bool HasInt256 = Subtarget.hasInt256();
1172
1173 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1174 : &X86::VR256RegClass);
1175 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1176 : &X86::VR256RegClass);
1177 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1178 : &X86::VR256RegClass);
1179 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1180 : &X86::VR256RegClass);
1181 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1182 : &X86::VR256RegClass);
1183 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1184 : &X86::VR256RegClass);
1185
1186 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1187 setOperationAction(ISD::FFLOOR, VT, Legal);
1188 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1189 setOperationAction(ISD::FCEIL, VT, Legal);
1190 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1191 setOperationAction(ISD::FTRUNC, VT, Legal);
1192 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1193 setOperationAction(ISD::FRINT, VT, Legal);
1194 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1195 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1196 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1197 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1198 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1199
1200 setOperationAction(ISD::FROUND, VT, Custom);
1201
1202 setOperationAction(ISD::FNEG, VT, Custom);
1203 setOperationAction(ISD::FABS, VT, Custom);
1204 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1205 }
1206
1207 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1208 // even though v8i16 is a legal type.
1209 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1210 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1211 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1212 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1213 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1214 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1215
1216 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1217 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1218
1219 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1220 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1221 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1222 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1223 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1224 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1225 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1226 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1227 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1228 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1229 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1230 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1231
1232 if (!Subtarget.hasAVX512())
1233 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1234
1235 // In the customized shift lowering, the legal v8i32/v4i64 cases
1236 // in AVX2 will be recognized.
1237 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1238 setOperationAction(ISD::SRL, VT, Custom);
1239 setOperationAction(ISD::SHL, VT, Custom);
1240 setOperationAction(ISD::SRA, VT, Custom);
1241 }
1242
1243 // These types need custom splitting if their input is a 128-bit vector.
1244 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1245 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1246 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1247 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1248
1249 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1250 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1251
1252 // With BWI, expanding (and promoting the shifts) is the better.
1253 if (!Subtarget.useBWIRegs())
1254 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1255
1256 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1257 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1258 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1259 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1260 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1261 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1262
1263 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1264 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1265 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1266 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1267 }
1268
1269 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1270 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1271 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1272 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1273
1274 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1275 setOperationAction(ISD::SETCC, VT, Custom);
1276 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1277 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1278 setOperationAction(ISD::CTPOP, VT, Custom);
1279 setOperationAction(ISD::CTLZ, VT, Custom);
1280
1281 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1282 // setcc all the way to isel and prefer SETGT in some isel patterns.
1283 setCondCodeAction(ISD::SETLT, VT, Custom);
1284 setCondCodeAction(ISD::SETLE, VT, Custom);
1285 }
1286
1287 if (Subtarget.hasAnyFMA()) {
1288 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1289 MVT::v2f64, MVT::v4f64 }) {
1290 setOperationAction(ISD::FMA, VT, Legal);
1291 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1292 }
1293 }
1294
1295 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1296 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1297 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1298 }
1299
1300 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1301 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1302 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1303 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1304
1305 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1306 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1307 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1308 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1309 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1310 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1311
1312 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1313 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1314 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1315 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1316 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1317
1318 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1319 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1320 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1321 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1322 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1323 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1324 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1325 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1326 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1327 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1328 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1329 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1330
1331 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1332 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1333 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1334 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1335 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1336 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1337 }
1338
1339 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1340 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1341 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1342 }
1343
1344 if (HasInt256) {
1345 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1346 // when we have a 256bit-wide blend with immediate.
1347 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1348 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1349
1350 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1351 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1352 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1353 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1354 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1355 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1356 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1357 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1358 }
1359 }
1360
1361 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1362 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1363 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1364 setOperationAction(ISD::MSTORE, VT, Legal);
1365 }
1366
1367 // Extract subvector is special because the value type
1368 // (result) is 128-bit but the source is 256-bit wide.
1369 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1370 MVT::v4f32, MVT::v2f64 }) {
1371 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1372 }
1373
1374 // Custom lower several nodes for 256-bit types.
1375 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1376 MVT::v8f32, MVT::v4f64 }) {
1377 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1378 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1379 setOperationAction(ISD::VSELECT, VT, Custom);
1380 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1381 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1382 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1383 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1384 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1385 setOperationAction(ISD::STORE, VT, Custom);
1386 }
1387
1388 if (HasInt256) {
1389 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1390
1391 // Custom legalize 2x32 to get a little better code.
1392 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1393 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1394
1395 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1396 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1397 setOperationAction(ISD::MGATHER, VT, Custom);
1398 }
1399 }
1400
1401 // This block controls legalization of the mask vector sizes that are
1402 // available with AVX512. 512-bit vectors are in a separate block controlled
1403 // by useAVX512Regs.
1404 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1405 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1406 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1407 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1408 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1409 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1410
1411 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1412 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1413 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1414
1415 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1416 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1417 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1418 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1419 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1420 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1421 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1422 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1423 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1424 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1425 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1426 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1427
1428 // There is no byte sized k-register load or store without AVX512DQ.
1429 if (!Subtarget.hasDQI()) {
1430 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1431 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1432 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1433 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1434
1435 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1436 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1437 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1438 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1439 }
1440
1441 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1442 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1443 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1444 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1445 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1446 }
1447
1448 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1449 setOperationAction(ISD::ADD, VT, Custom);
1450 setOperationAction(ISD::SUB, VT, Custom);
1451 setOperationAction(ISD::MUL, VT, Custom);
1452 setOperationAction(ISD::UADDSAT, VT, Custom);
1453 setOperationAction(ISD::SADDSAT, VT, Custom);
1454 setOperationAction(ISD::USUBSAT, VT, Custom);
1455 setOperationAction(ISD::SSUBSAT, VT, Custom);
1456 setOperationAction(ISD::VSELECT, VT, Expand);
1457 }
1458
1459 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1460 setOperationAction(ISD::SETCC, VT, Custom);
1461 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1462 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1463 setOperationAction(ISD::SELECT, VT, Custom);
1464 setOperationAction(ISD::TRUNCATE, VT, Custom);
1465
1466 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1467 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1468 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1469 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1470 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1471 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1472 }
1473
1474 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1475 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1476 }
1477
1478 // This block controls legalization for 512-bit operations with 32/64 bit
1479 // elements. 512-bits can be disabled based on prefer-vector-width and
1480 // required-vector-width function attributes.
1481 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1482 bool HasBWI = Subtarget.hasBWI();
1483
1484 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1485 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1486 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1487 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1488 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1489 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1490
1491 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1492 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1493 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1494 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1495 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1496 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1497 if (HasBWI)
1498 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1499 }
1500
1501 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1502 setOperationAction(ISD::FNEG, VT, Custom);
1503 setOperationAction(ISD::FABS, VT, Custom);
1504 setOperationAction(ISD::FMA, VT, Legal);
1505 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1506 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1507 }
1508
1509 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1510 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1511 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1512 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1513 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1514 }
1515 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1516 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1517 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1518 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1519 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1520 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1521 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1522 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1523
1524 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1525 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1526 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1527 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1528 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1529 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1530 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1531 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1532 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1533 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1534 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1535 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1536
1537 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1538 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1539 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1540 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1541 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1542 if (HasBWI)
1543 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1544
1545 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1546 // to 512-bit rather than use the AVX2 instructions so that we can use
1547 // k-masks.
1548 if (!Subtarget.hasVLX()) {
1549 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1550 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1551 setOperationAction(ISD::MLOAD, VT, Custom);
1552 setOperationAction(ISD::MSTORE, VT, Custom);
1553 }
1554 }
1555
1556 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1557 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1558 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1559 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1560 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1561 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1562 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1563 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1564 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1565 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1566 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1567 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1568 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1569
1570 if (HasBWI) {
1571 // Extends from v64i1 masks to 512-bit vectors.
1572 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1573 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1574 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1575 }
1576
1577 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1578 setOperationAction(ISD::FFLOOR, VT, Legal);
1579 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1580 setOperationAction(ISD::FCEIL, VT, Legal);
1581 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1582 setOperationAction(ISD::FTRUNC, VT, Legal);
1583 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1584 setOperationAction(ISD::FRINT, VT, Legal);
1585 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1586 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1587 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1588 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1589 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1590
1591 setOperationAction(ISD::FROUND, VT, Custom);
1592 }
1593
1594 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1595 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1596 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1597 }
1598
1599 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1600 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1601 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1602 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1603
1604 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1605 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1606 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1607 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1608
1609 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1610 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1611 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1612 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1613 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1614 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1615
1616 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1617
1618 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1619 setOperationAction(ISD::SRL, VT, Custom);
1620 setOperationAction(ISD::SHL, VT, Custom);
1621 setOperationAction(ISD::SRA, VT, Custom);
1622 setOperationAction(ISD::SETCC, VT, Custom);
1623
1624 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1625 // setcc all the way to isel and prefer SETGT in some isel patterns.
1626 setCondCodeAction(ISD::SETLT, VT, Custom);
1627 setCondCodeAction(ISD::SETLE, VT, Custom);
1628 }
1629 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1630 setOperationAction(ISD::SMAX, VT, Legal);
1631 setOperationAction(ISD::UMAX, VT, Legal);
1632 setOperationAction(ISD::SMIN, VT, Legal);
1633 setOperationAction(ISD::UMIN, VT, Legal);
1634 setOperationAction(ISD::ABS, VT, Legal);
1635 setOperationAction(ISD::CTPOP, VT, Custom);
1636 setOperationAction(ISD::ROTL, VT, Custom);
1637 setOperationAction(ISD::ROTR, VT, Custom);
1638 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1639 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1640 }
1641
1642 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1643 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1644 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1645 setOperationAction(ISD::CTLZ, VT, Custom);
1646 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1647 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1648 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1649 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1650 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1651 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1652 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1653 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1654 }
1655
1656 if (Subtarget.hasDQI()) {
1657 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1658 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1659 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1660 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1661 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1662 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1663 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1664 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1665
1666 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1667 }
1668
1669 if (Subtarget.hasCDI()) {
1670 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1671 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1672 setOperationAction(ISD::CTLZ, VT, Legal);
1673 }
1674 } // Subtarget.hasCDI()
1675
1676 if (Subtarget.hasVPOPCNTDQ()) {
1677 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1678 setOperationAction(ISD::CTPOP, VT, Legal);
1679 }
1680
1681 // Extract subvector is special because the value type
1682 // (result) is 256-bit but the source is 512-bit wide.
1683 // 128-bit was made Legal under AVX1.
1684 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1685 MVT::v8f32, MVT::v4f64 })
1686 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1687
1688 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1689 MVT::v16f32, MVT::v8f64 }) {
1690 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1691 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1692 setOperationAction(ISD::SELECT, VT, Custom);
1693 setOperationAction(ISD::VSELECT, VT, Custom);
1694 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1695 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1696 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1697 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1698 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1699 }
1700
1701 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1702 setOperationAction(ISD::MLOAD, VT, Legal);
1703 setOperationAction(ISD::MSTORE, VT, Legal);
1704 setOperationAction(ISD::MGATHER, VT, Custom);
1705 setOperationAction(ISD::MSCATTER, VT, Custom);
1706 }
1707 if (HasBWI) {
1708 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1709 setOperationAction(ISD::MLOAD, VT, Legal);
1710 setOperationAction(ISD::MSTORE, VT, Legal);
1711 }
1712 } else {
1713 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1714 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1715 }
1716
1717 if (Subtarget.hasVBMI2()) {
1718 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1719 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1720 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1721 setOperationAction(ISD::FSHL, VT, Custom);
1722 setOperationAction(ISD::FSHR, VT, Custom);
1723 }
1724
1725 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1726 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1727 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1728 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1729 }
1730 }// useAVX512Regs
1731
1732 // This block controls legalization for operations that don't have
1733 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1734 // narrower widths.
1735 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1736 // These operations are handled on non-VLX by artificially widening in
1737 // isel patterns.
1738
1739 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1740 Subtarget.hasVLX() ? Legal : Custom);
1741 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1742 Subtarget.hasVLX() ? Legal : Custom);
1743 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1744 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1745 Subtarget.hasVLX() ? Legal : Custom);
1746 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1747 Subtarget.hasVLX() ? Legal : Custom);
1748 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1749 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1750 Subtarget.hasVLX() ? Legal : Custom);
1751 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1752 Subtarget.hasVLX() ? Legal : Custom);
1753 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1754 Subtarget.hasVLX() ? Legal : Custom);
1755 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1756 Subtarget.hasVLX() ? Legal : Custom);
1757
1758 if (Subtarget.hasDQI()) {
1759 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1760 // v2f32 UINT_TO_FP is already custom under SSE2.
1761 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1763, __PRETTY_FUNCTION__))
1762 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1763, __PRETTY_FUNCTION__))
1763 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1763, __PRETTY_FUNCTION__))
;
1764 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1765 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1766 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1767 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1768 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1769 }
1770
1771 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1772 setOperationAction(ISD::SMAX, VT, Legal);
1773 setOperationAction(ISD::UMAX, VT, Legal);
1774 setOperationAction(ISD::SMIN, VT, Legal);
1775 setOperationAction(ISD::UMIN, VT, Legal);
1776 setOperationAction(ISD::ABS, VT, Legal);
1777 }
1778
1779 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1780 setOperationAction(ISD::ROTL, VT, Custom);
1781 setOperationAction(ISD::ROTR, VT, Custom);
1782 }
1783
1784 // Custom legalize 2x32 to get a little better code.
1785 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1786 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1787
1788 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1789 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1790 setOperationAction(ISD::MSCATTER, VT, Custom);
1791
1792 if (Subtarget.hasDQI()) {
1793 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1794 setOperationAction(ISD::SINT_TO_FP, VT,
1795 Subtarget.hasVLX() ? Legal : Custom);
1796 setOperationAction(ISD::UINT_TO_FP, VT,
1797 Subtarget.hasVLX() ? Legal : Custom);
1798 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1799 Subtarget.hasVLX() ? Legal : Custom);
1800 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1801 Subtarget.hasVLX() ? Legal : Custom);
1802 setOperationAction(ISD::FP_TO_SINT, VT,
1803 Subtarget.hasVLX() ? Legal : Custom);
1804 setOperationAction(ISD::FP_TO_UINT, VT,
1805 Subtarget.hasVLX() ? Legal : Custom);
1806 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1807 Subtarget.hasVLX() ? Legal : Custom);
1808 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1809 Subtarget.hasVLX() ? Legal : Custom);
1810 setOperationAction(ISD::MUL, VT, Legal);
1811 }
1812 }
1813
1814 if (Subtarget.hasCDI()) {
1815 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1816 setOperationAction(ISD::CTLZ, VT, Legal);
1817 }
1818 } // Subtarget.hasCDI()
1819
1820 if (Subtarget.hasVPOPCNTDQ()) {
1821 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1822 setOperationAction(ISD::CTPOP, VT, Legal);
1823 }
1824 }
1825
1826 // This block control legalization of v32i1/v64i1 which are available with
1827 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1828 // useBWIRegs.
1829 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1830 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1831 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1832
1833 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1834 setOperationAction(ISD::ADD, VT, Custom);
1835 setOperationAction(ISD::SUB, VT, Custom);
1836 setOperationAction(ISD::MUL, VT, Custom);
1837 setOperationAction(ISD::VSELECT, VT, Expand);
1838 setOperationAction(ISD::UADDSAT, VT, Custom);
1839 setOperationAction(ISD::SADDSAT, VT, Custom);
1840 setOperationAction(ISD::USUBSAT, VT, Custom);
1841 setOperationAction(ISD::SSUBSAT, VT, Custom);
1842
1843 setOperationAction(ISD::TRUNCATE, VT, Custom);
1844 setOperationAction(ISD::SETCC, VT, Custom);
1845 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1846 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1847 setOperationAction(ISD::SELECT, VT, Custom);
1848 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1849 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1850 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1851 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1852 }
1853
1854 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1855 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1856
1857 // Extends from v32i1 masks to 256-bit vectors.
1858 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1859 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1860 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1861
1862 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1863 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1864 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1865 }
1866
1867 // These operations are handled on non-VLX by artificially widening in
1868 // isel patterns.
1869 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1870
1871 if (Subtarget.hasBITALG()) {
1872 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1873 setOperationAction(ISD::CTPOP, VT, Legal);
1874 }
1875 }
1876
1877 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1878 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1879 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1880 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1881 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1882 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1883
1884 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1885 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1886 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1887 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1888 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1889
1890 if (Subtarget.hasBWI()) {
1891 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1892 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1893 }
1894
1895 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1896 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1897 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1898 }
1899
1900 if (Subtarget.hasAMXTILE()) {
1901 addRegisterClass(MVT::v256i32, &X86::TILERegClass);
1902 }
1903
1904 // We want to custom lower some of our intrinsics.
1905 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1906 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1907 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1908 if (!Subtarget.is64Bit()) {
1909 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1910 }
1911
1912 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1913 // handle type legalization for these operations here.
1914 //
1915 // FIXME: We really should do custom legalization for addition and
1916 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1917 // than generic legalization for 64-bit multiplication-with-overflow, though.
1918 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1919 if (VT == MVT::i64 && !Subtarget.is64Bit())
1920 continue;
1921 // Add/Sub/Mul with overflow operations are custom lowered.
1922 setOperationAction(ISD::SADDO, VT, Custom);
1923 setOperationAction(ISD::UADDO, VT, Custom);
1924 setOperationAction(ISD::SSUBO, VT, Custom);
1925 setOperationAction(ISD::USUBO, VT, Custom);
1926 setOperationAction(ISD::SMULO, VT, Custom);
1927 setOperationAction(ISD::UMULO, VT, Custom);
1928
1929 // Support carry in as value rather than glue.
1930 setOperationAction(ISD::ADDCARRY, VT, Custom);
1931 setOperationAction(ISD::SUBCARRY, VT, Custom);
1932 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1933 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
1934 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
1935 }
1936
1937 if (!Subtarget.is64Bit()) {
1938 // These libcalls are not available in 32-bit.
1939 setLibcallName(RTLIB::SHL_I128, nullptr);
1940 setLibcallName(RTLIB::SRL_I128, nullptr);
1941 setLibcallName(RTLIB::SRA_I128, nullptr);
1942 setLibcallName(RTLIB::MUL_I128, nullptr);
1943 }
1944
1945 // Combine sin / cos into _sincos_stret if it is available.
1946 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1947 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1948 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1949 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1950 }
1951
1952 if (Subtarget.isTargetWin64()) {
1953 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1954 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1955 setOperationAction(ISD::SREM, MVT::i128, Custom);
1956 setOperationAction(ISD::UREM, MVT::i128, Custom);
1957 }
1958
1959 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1960 // is. We should promote the value to 64-bits to solve this.
1961 // This is what the CRT headers do - `fmodf` is an inline header
1962 // function casting to f64 and calling `fmod`.
1963 if (Subtarget.is32Bit() &&
1964 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
1965 for (ISD::NodeType Op :
1966 {ISD::FCEIL, ISD::STRICT_FCEIL,
1967 ISD::FCOS, ISD::STRICT_FCOS,
1968 ISD::FEXP, ISD::STRICT_FEXP,
1969 ISD::FFLOOR, ISD::STRICT_FFLOOR,
1970 ISD::FREM, ISD::STRICT_FREM,
1971 ISD::FLOG, ISD::STRICT_FLOG,
1972 ISD::FLOG10, ISD::STRICT_FLOG10,
1973 ISD::FPOW, ISD::STRICT_FPOW,
1974 ISD::FSIN, ISD::STRICT_FSIN})
1975 if (isOperationExpand(Op, MVT::f32))
1976 setOperationAction(Op, MVT::f32, Promote);
1977
1978 // We have target-specific dag combine patterns for the following nodes:
1979 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1980 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1981 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
1982 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1983 setTargetDAGCombine(ISD::CONCAT_VECTORS);
1984 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1985 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1986 setTargetDAGCombine(ISD::BITCAST);
1987 setTargetDAGCombine(ISD::VSELECT);
1988 setTargetDAGCombine(ISD::SELECT);
1989 setTargetDAGCombine(ISD::SHL);
1990 setTargetDAGCombine(ISD::SRA);
1991 setTargetDAGCombine(ISD::SRL);
1992 setTargetDAGCombine(ISD::OR);
1993 setTargetDAGCombine(ISD::AND);
1994 setTargetDAGCombine(ISD::ADD);
1995 setTargetDAGCombine(ISD::FADD);
1996 setTargetDAGCombine(ISD::FSUB);
1997 setTargetDAGCombine(ISD::FNEG);
1998 setTargetDAGCombine(ISD::FMA);
1999 setTargetDAGCombine(ISD::STRICT_FMA);
2000 setTargetDAGCombine(ISD::FMINNUM);
2001 setTargetDAGCombine(ISD::FMAXNUM);
2002 setTargetDAGCombine(ISD::SUB);
2003 setTargetDAGCombine(ISD::LOAD);
2004 setTargetDAGCombine(ISD::MLOAD);
2005 setTargetDAGCombine(ISD::STORE);
2006 setTargetDAGCombine(ISD::MSTORE);
2007 setTargetDAGCombine(ISD::TRUNCATE);
2008 setTargetDAGCombine(ISD::ZERO_EXTEND);
2009 setTargetDAGCombine(ISD::ANY_EXTEND);
2010 setTargetDAGCombine(ISD::SIGN_EXTEND);
2011 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2012 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2013 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2014 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2015 setTargetDAGCombine(ISD::SINT_TO_FP);
2016 setTargetDAGCombine(ISD::UINT_TO_FP);
2017 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2018 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2019 setTargetDAGCombine(ISD::SETCC);
2020 setTargetDAGCombine(ISD::MUL);
2021 setTargetDAGCombine(ISD::XOR);
2022 setTargetDAGCombine(ISD::MSCATTER);
2023 setTargetDAGCombine(ISD::MGATHER);
2024 setTargetDAGCombine(ISD::FP16_TO_FP);
2025 setTargetDAGCombine(ISD::FP_EXTEND);
2026 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
2027 setTargetDAGCombine(ISD::FP_ROUND);
2028
2029 computeRegisterProperties(Subtarget.getRegisterInfo());
2030
2031 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2032 MaxStoresPerMemsetOptSize = 8;
2033 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2034 MaxStoresPerMemcpyOptSize = 4;
2035 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2036 MaxStoresPerMemmoveOptSize = 4;
2037
2038 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2039 // that needs to benchmarked and balanced with the potential use of vector
2040 // load/store types (PR33329, PR33914).
2041 MaxLoadsPerMemcmp = 2;
2042 MaxLoadsPerMemcmpOptSize = 2;
2043
2044 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
2045 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
2046
2047 // An out-of-order CPU can speculatively execute past a predictable branch,
2048 // but a conditional move could be stalled by an expensive earlier operation.
2049 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2050 EnableExtLdPromotion = true;
2051 setPrefFunctionAlignment(Align(16));
2052
2053 verifyIntrinsicTables();
2054
2055 // Default to having -disable-strictnode-mutation on
2056 IsStrictFPEnabled = true;
2057}
2058
2059// This has so far only been implemented for 64-bit MachO.
2060bool X86TargetLowering::useLoadStackGuardNode() const {
2061 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2062}
2063
2064bool X86TargetLowering::useStackGuardXorFP() const {
2065 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2066 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2067}
2068
2069SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2070 const SDLoc &DL) const {
2071 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2072 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2073 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2074 return SDValue(Node, 0);
2075}
2076
2077TargetLoweringBase::LegalizeTypeAction
2078X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2079 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2080 !Subtarget.hasBWI())
2081 return TypeSplitVector;
2082
2083 if (VT.getVectorNumElements() != 1 &&
2084 VT.getVectorElementType() != MVT::i1)
2085 return TypeWidenVector;
2086
2087 return TargetLoweringBase::getPreferredVectorAction(VT);
2088}
2089
2090static std::pair<MVT, unsigned>
2091handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2092 const X86Subtarget &Subtarget) {
2093 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2094 // convention is one that uses k registers.
2095 if (NumElts == 2)
2096 return {MVT::v2i64, 1};
2097 if (NumElts == 4)
2098 return {MVT::v4i32, 1};
2099 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2100 CC != CallingConv::Intel_OCL_BI)
2101 return {MVT::v8i16, 1};
2102 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2103 CC != CallingConv::Intel_OCL_BI)
2104 return {MVT::v16i8, 1};
2105 // v32i1 passes in ymm unless we have BWI and the calling convention is
2106 // regcall.
2107 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2108 return {MVT::v32i8, 1};
2109 // Split v64i1 vectors if we don't have v64i8 available.
2110 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2111 if (Subtarget.useAVX512Regs())
2112 return {MVT::v64i8, 1};
2113 return {MVT::v32i8, 2};
2114 }
2115
2116 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2117 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2118 NumElts > 64)
2119 return {MVT::i8, NumElts};
2120
2121 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2122}
2123
2124MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2125 CallingConv::ID CC,
2126 EVT VT) const {
2127 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2128 Subtarget.hasAVX512()) {
2129 unsigned NumElts = VT.getVectorNumElements();
2130
2131 MVT RegisterVT;
2132 unsigned NumRegisters;
2133 std::tie(RegisterVT, NumRegisters) =
2134 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2135 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2136 return RegisterVT;
2137 }
2138
2139 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2140}
2141
2142unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2143 CallingConv::ID CC,
2144 EVT VT) const {
2145 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2146 Subtarget.hasAVX512()) {
2147 unsigned NumElts = VT.getVectorNumElements();
2148
2149 MVT RegisterVT;
2150 unsigned NumRegisters;
2151 std::tie(RegisterVT, NumRegisters) =
2152 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2153 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2154 return NumRegisters;
2155 }
2156
2157 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2158}
2159
2160unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2161 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2162 unsigned &NumIntermediates, MVT &RegisterVT) const {
2163 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2164 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2165 Subtarget.hasAVX512() &&
2166 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2167 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2168 VT.getVectorNumElements() > 64)) {
2169 RegisterVT = MVT::i8;
2170 IntermediateVT = MVT::i1;
2171 NumIntermediates = VT.getVectorNumElements();
2172 return NumIntermediates;
2173 }
2174
2175 // Split v64i1 vectors if we don't have v64i8 available.
2176 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2177 CC != CallingConv::X86_RegCall) {
2178 RegisterVT = MVT::v32i8;
2179 IntermediateVT = MVT::v32i1;
2180 NumIntermediates = 2;
2181 return 2;
2182 }
2183
2184 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2185 NumIntermediates, RegisterVT);
2186}
2187
2188EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2189 LLVMContext& Context,
2190 EVT VT) const {
2191 if (!VT.isVector())
2192 return MVT::i8;
2193
2194 if (Subtarget.hasAVX512()) {
2195 const unsigned NumElts = VT.getVectorNumElements();
2196
2197 // Figure out what this type will be legalized to.
2198 EVT LegalVT = VT;
2199 while (getTypeAction(Context, LegalVT) != TypeLegal)
2200 LegalVT = getTypeToTransformTo(Context, LegalVT);
2201
2202 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2203 if (LegalVT.getSimpleVT().is512BitVector())
2204 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2205
2206 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2207 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2208 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2209 // vXi16/vXi8.
2210 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2211 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2212 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2213 }
2214 }
2215
2216 return VT.changeVectorElementTypeToInteger();
2217}
2218
2219/// Helper for getByValTypeAlignment to determine
2220/// the desired ByVal argument alignment.
2221static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2222 if (MaxAlign == 16)
2223 return;
2224 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2225 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2226 MaxAlign = Align(16);
2227 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2228 Align EltAlign;
2229 getMaxByValAlign(ATy->getElementType(), EltAlign);
2230 if (EltAlign > MaxAlign)
2231 MaxAlign = EltAlign;
2232 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2233 for (auto *EltTy : STy->elements()) {
2234 Align EltAlign;
2235 getMaxByValAlign(EltTy, EltAlign);
2236 if (EltAlign > MaxAlign)
2237 MaxAlign = EltAlign;
2238 if (MaxAlign == 16)
2239 break;
2240 }
2241 }
2242}
2243
2244/// Return the desired alignment for ByVal aggregate
2245/// function arguments in the caller parameter area. For X86, aggregates
2246/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2247/// are at 4-byte boundaries.
2248unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2249 const DataLayout &DL) const {
2250 if (Subtarget.is64Bit()) {
2251 // Max of 8 and alignment of type.
2252 Align TyAlign = DL.getABITypeAlign(Ty);
2253 if (TyAlign > 8)
2254 return TyAlign.value();
2255 return 8;
2256 }
2257
2258 Align Alignment(4);
2259 if (Subtarget.hasSSE1())
2260 getMaxByValAlign(Ty, Alignment);
2261 return Alignment.value();
2262}
2263
2264/// It returns EVT::Other if the type should be determined using generic
2265/// target-independent logic.
2266/// For vector ops we check that the overall size isn't larger than our
2267/// preferred vector width.
2268EVT X86TargetLowering::getOptimalMemOpType(
2269 const MemOp &Op, const AttributeList &FuncAttributes) const {
2270 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2271 if (Op.size() >= 16 &&
2272 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2273 // FIXME: Check if unaligned 64-byte accesses are slow.
2274 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2275 (Subtarget.getPreferVectorWidth() >= 512)) {
2276 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2277 }
2278 // FIXME: Check if unaligned 32-byte accesses are slow.
2279 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2280 (Subtarget.getPreferVectorWidth() >= 256)) {
2281 // Although this isn't a well-supported type for AVX1, we'll let
2282 // legalization and shuffle lowering produce the optimal codegen. If we
2283 // choose an optimal type with a vector element larger than a byte,
2284 // getMemsetStores() may create an intermediate splat (using an integer
2285 // multiply) before we splat as a vector.
2286 return MVT::v32i8;
2287 }
2288 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2289 return MVT::v16i8;
2290 // TODO: Can SSE1 handle a byte vector?
2291 // If we have SSE1 registers we should be able to use them.
2292 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2293 (Subtarget.getPreferVectorWidth() >= 128))
2294 return MVT::v4f32;
2295 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2296 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2297 // Do not use f64 to lower memcpy if source is string constant. It's
2298 // better to use i32 to avoid the loads.
2299 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2300 // The gymnastics of splatting a byte value into an XMM register and then
2301 // only using 8-byte stores (because this is a CPU with slow unaligned
2302 // 16-byte accesses) makes that a loser.
2303 return MVT::f64;
2304 }
2305 }
2306 // This is a compromise. If we reach here, unaligned accesses may be slow on
2307 // this target. However, creating smaller, aligned accesses could be even
2308 // slower and would certainly be a lot more code.
2309 if (Subtarget.is64Bit() && Op.size() >= 8)
2310 return MVT::i64;
2311 return MVT::i32;
2312}
2313
2314bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2315 if (VT == MVT::f32)
2316 return X86ScalarSSEf32;
2317 else if (VT == MVT::f64)
2318 return X86ScalarSSEf64;
2319 return true;
2320}
2321
2322bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2323 EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
2324 bool *Fast) const {
2325 if (Fast) {
2326 switch (VT.getSizeInBits()) {
2327 default:
2328 // 8-byte and under are always assumed to be fast.
2329 *Fast = true;
2330 break;
2331 case 128:
2332 *Fast = !Subtarget.isUnalignedMem16Slow();
2333 break;
2334 case 256:
2335 *Fast = !Subtarget.isUnalignedMem32Slow();
2336 break;
2337 // TODO: What about AVX-512 (512-bit) accesses?
2338 }
2339 }
2340 // NonTemporal vector memory ops must be aligned.
2341 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2342 // NT loads can only be vector aligned, so if its less aligned than the
2343 // minimum vector size (which we can split the vector down to), we might as
2344 // well use a regular unaligned vector load.
2345 // We don't have any NT loads pre-SSE41.
2346 if (!!(Flags & MachineMemOperand::MOLoad))
2347 return (Align < 16 || !Subtarget.hasSSE41());
2348 return false;
2349 }
2350 // Misaligned accesses of any size are always allowed.
2351 return true;
2352}
2353
2354/// Return the entry encoding for a jump table in the
2355/// current function. The returned value is a member of the
2356/// MachineJumpTableInfo::JTEntryKind enum.
2357unsigned X86TargetLowering::getJumpTableEncoding() const {
2358 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2359 // symbol.
2360 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2361 return MachineJumpTableInfo::EK_Custom32;
2362
2363 // Otherwise, use the normal jump table encoding heuristics.
2364 return TargetLowering::getJumpTableEncoding();
2365}
2366
2367bool X86TargetLowering::useSoftFloat() const {
2368 return Subtarget.useSoftFloat();
2369}
2370
2371void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2372 ArgListTy &Args) const {
2373
2374 // Only relabel X86-32 for C / Stdcall CCs.
2375 if (Subtarget.is64Bit())
2376 return;
2377 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2378 return;
2379 unsigned ParamRegs = 0;
2380 if (auto *M = MF->getFunction().getParent())
2381 ParamRegs = M->getNumberRegisterParameters();
2382
2383 // Mark the first N int arguments as having reg
2384 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2385 Type *T = Args[Idx].Ty;
2386 if (T->isIntOrPtrTy())
2387 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2388 unsigned numRegs = 1;
2389 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2390 numRegs = 2;
2391 if (ParamRegs < numRegs)
2392 return;
2393 ParamRegs -= numRegs;
2394 Args[Idx].IsInReg = true;
2395 }
2396 }
2397}
2398
2399const MCExpr *
2400X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2401 const MachineBasicBlock *MBB,
2402 unsigned uid,MCContext &Ctx) const{
2403 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2403, __PRETTY_FUNCTION__))
;
2404 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2405 // entries.
2406 return MCSymbolRefExpr::create(MBB->getSymbol(),
2407 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2408}
2409
2410/// Returns relocation base for the given PIC jumptable.
2411SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2412 SelectionDAG &DAG) const {
2413 if (!Subtarget.is64Bit())
2414 // This doesn't have SDLoc associated with it, but is not really the
2415 // same as a Register.
2416 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2417 getPointerTy(DAG.getDataLayout()));
2418 return Table;
2419}
2420
2421/// This returns the relocation base for the given PIC jumptable,
2422/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2423const MCExpr *X86TargetLowering::
2424getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2425 MCContext &Ctx) const {
2426 // X86-64 uses RIP relative addressing based on the jump table label.
2427 if (Subtarget.isPICStyleRIPRel())
2428 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2429
2430 // Otherwise, the reference is relative to the PIC base.
2431 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2432}
2433
2434std::pair<const TargetRegisterClass *, uint8_t>
2435X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2436 MVT VT) const {
2437 const TargetRegisterClass *RRC = nullptr;
2438 uint8_t Cost = 1;
2439 switch (VT.SimpleTy) {
2440 default:
2441 return TargetLowering::findRepresentativeClass(TRI, VT);
2442 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2443 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2444 break;
2445 case MVT::x86mmx:
2446 RRC = &X86::VR64RegClass;
2447 break;
2448 case MVT::f32: case MVT::f64:
2449 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2450 case MVT::v4f32: case MVT::v2f64:
2451 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2452 case MVT::v8f32: case MVT::v4f64:
2453 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2454 case MVT::v16f32: case MVT::v8f64:
2455 RRC = &X86::VR128XRegClass;
2456 break;
2457 }
2458 return std::make_pair(RRC, Cost);
2459}
2460
2461unsigned X86TargetLowering::getAddressSpace() const {
2462 if (Subtarget.is64Bit())
2463 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2464 return 256;
2465}
2466
2467static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2468 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2469 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2470}
2471
2472static Constant* SegmentOffset(IRBuilder<> &IRB,
2473 unsigned Offset, unsigned AddressSpace) {
2474 return ConstantExpr::getIntToPtr(
2475 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2476 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2477}
2478
2479Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2480 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2481 // tcbhead_t; use it instead of the usual global variable (see
2482 // sysdeps/{i386,x86_64}/nptl/tls.h)
2483 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2484 if (Subtarget.isTargetFuchsia()) {
2485 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2486 return SegmentOffset(IRB, 0x10, getAddressSpace());
2487 } else {
2488 unsigned AddressSpace = getAddressSpace();
2489 // Specially, some users may customize the base reg and offset.
2490 unsigned Offset = getTargetMachine().Options.StackProtectorGuardOffset;
2491 // If we don't set -stack-protector-guard-offset value:
2492 // %fs:0x28, unless we're using a Kernel code model, in which case
2493 // it's %gs:0x28. gs:0x14 on i386.
2494 if (Offset == (unsigned)-1)
2495 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2496
2497 auto GuardReg = getTargetMachine().Options.StackProtectorGuardReg;
2498 if (GuardReg == "fs")
2499 AddressSpace = X86AS::FS;
2500 else if (GuardReg == "gs")
2501 AddressSpace = X86AS::GS;
2502 return SegmentOffset(IRB, Offset, AddressSpace);
2503 }
2504 }
2505 return TargetLowering::getIRStackGuard(IRB);
2506}
2507
2508void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2509 // MSVC CRT provides functionalities for stack protection.
2510 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2511 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2512 // MSVC CRT has a global variable holding security cookie.
2513 M.getOrInsertGlobal("__security_cookie",
2514 Type::getInt8PtrTy(M.getContext()));
2515
2516 // MSVC CRT has a function to validate security cookie.
2517 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2518 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2519 Type::getInt8PtrTy(M.getContext()));
2520 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2521 F->setCallingConv(CallingConv::X86_FastCall);
2522 F->addAttribute(1, Attribute::AttrKind::InReg);
2523 }
2524 return;
2525 }
2526
2527 auto GuardMode = getTargetMachine().Options.StackProtectorGuard;
2528
2529 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2530 if ((GuardMode == llvm::StackProtectorGuards::TLS ||
2531 GuardMode == llvm::StackProtectorGuards::None)
2532 && hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2533 return;
2534 TargetLowering::insertSSPDeclarations(M);
2535}
2536
2537Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2538 // MSVC CRT has a global variable holding security cookie.
2539 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2540 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2541 return M.getGlobalVariable("__security_cookie");
2542 }
2543 return TargetLowering::getSDagStackGuard(M);
2544}
2545
2546Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2547 // MSVC CRT has a function to validate security cookie.
2548 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2549 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2550 return M.getFunction("__security_check_cookie");
2551 }
2552 return TargetLowering::getSSPStackGuardCheck(M);
2553}
2554
2555Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2556 if (Subtarget.getTargetTriple().isOSContiki())
2557 return getDefaultSafeStackPointerLocation(IRB, false);
2558
2559 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2560 // definition of TLS_SLOT_SAFESTACK in
2561 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2562 if (Subtarget.isTargetAndroid()) {
2563 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2564 // %gs:0x24 on i386
2565 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2566 return SegmentOffset(IRB, Offset, getAddressSpace());
2567 }
2568
2569 // Fuchsia is similar.
2570 if (Subtarget.isTargetFuchsia()) {
2571 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2572 return SegmentOffset(IRB, 0x18, getAddressSpace());
2573 }
2574
2575 return TargetLowering::getSafeStackPointerLocation(IRB);
2576}
2577
2578//===----------------------------------------------------------------------===//
2579// Return Value Calling Convention Implementation
2580//===----------------------------------------------------------------------===//
2581
2582bool X86TargetLowering::CanLowerReturn(
2583 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2584 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2585 SmallVector<CCValAssign, 16> RVLocs;
2586 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2587 return CCInfo.CheckReturn(Outs, RetCC_X86);
2588}
2589
2590const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2591 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2592 return ScratchRegs;
2593}
2594
2595/// Lowers masks values (v*i1) to the local register values
2596/// \returns DAG node after lowering to register type
2597static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2598 const SDLoc &Dl, SelectionDAG &DAG) {
2599 EVT ValVT = ValArg.getValueType();
2600
2601 if (ValVT == MVT::v1i1)
2602 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2603 DAG.getIntPtrConstant(0, Dl));
2604
2605 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2606 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2607 // Two stage lowering might be required
2608 // bitcast: v8i1 -> i8 / v16i1 -> i16
2609 // anyextend: i8 -> i32 / i16 -> i32
2610 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2611 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2612 if (ValLoc == MVT::i32)
2613 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2614 return ValToCopy;
2615 }
2616
2617 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2618 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2619 // One stage lowering is required
2620 // bitcast: v32i1 -> i32 / v64i1 -> i64
2621 return DAG.getBitcast(ValLoc, ValArg);
2622 }
2623
2624 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2625}
2626
2627/// Breaks v64i1 value into two registers and adds the new node to the DAG
2628static void Passv64i1ArgInRegs(
2629 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2630 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2631 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2632 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2632, __PRETTY_FUNCTION__))
;
2633 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2633, __PRETTY_FUNCTION__))
;
2634 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2634, __PRETTY_FUNCTION__))
;
2635 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2636, __PRETTY_FUNCTION__))
2636 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2636, __PRETTY_FUNCTION__))
;
2637
2638 // Before splitting the value we cast it to i64
2639 Arg = DAG.getBitcast(MVT::i64, Arg);
2640
2641 // Splitting the value into two i32 types
2642 SDValue Lo, Hi;
2643 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2644 DAG.getConstant(0, Dl, MVT::i32));
2645 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2646 DAG.getConstant(1, Dl, MVT::i32));
2647
2648 // Attach the two i32 types into corresponding registers
2649 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2650 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2651}
2652
2653SDValue
2654X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2655 bool isVarArg,
2656 const SmallVectorImpl<ISD::OutputArg> &Outs,
2657 const SmallVectorImpl<SDValue> &OutVals,
2658 const SDLoc &dl, SelectionDAG &DAG) const {
2659 MachineFunction &MF = DAG.getMachineFunction();
2660 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2661
2662 // In some cases we need to disable registers from the default CSR list.
2663 // For example, when they are used for argument passing.
2664 bool ShouldDisableCalleeSavedRegister =
2665 CallConv == CallingConv::X86_RegCall ||
2666 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2667
2668 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2669 report_fatal_error("X86 interrupts may not return any value");
2670
2671 SmallVector<CCValAssign, 16> RVLocs;
2672 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2673 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2674
2675 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2676 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2677 ++I, ++OutsIndex) {
2678 CCValAssign &VA = RVLocs[I];
2679 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2679, __PRETTY_FUNCTION__))
;
2680
2681 // Add the register to the CalleeSaveDisableRegs list.
2682 if (ShouldDisableCalleeSavedRegister)
2683 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2684
2685 SDValue ValToCopy = OutVals[OutsIndex];
2686 EVT ValVT = ValToCopy.getValueType();
2687
2688 // Promote values to the appropriate types.
2689 if (VA.getLocInfo() == CCValAssign::SExt)
2690 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2691 else if (VA.getLocInfo() == CCValAssign::ZExt)
2692 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2693 else if (VA.getLocInfo() == CCValAssign::AExt) {
2694 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2695 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2696 else
2697 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2698 }
2699 else if (VA.getLocInfo() == CCValAssign::BCvt)
2700 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2701
2702 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2703, __PRETTY_FUNCTION__))
2703 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2703, __PRETTY_FUNCTION__))
;
2704
2705 // Report an error if we have attempted to return a value via an XMM
2706 // register and SSE was disabled.
2707 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2708 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2709 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2710 } else if (!Subtarget.hasSSE2() &&
2711 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2712 ValVT == MVT::f64) {
2713 // When returning a double via an XMM register, report an error if SSE2 is
2714 // not enabled.
2715 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2716 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2717 }
2718
2719 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2720 // the RET instruction and handled by the FP Stackifier.
2721 if (VA.getLocReg() == X86::FP0 ||
2722 VA.getLocReg() == X86::FP1) {
2723 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2724 // change the value to the FP stack register class.
2725 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2726 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2727 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2728 // Don't emit a copytoreg.
2729 continue;
2730 }
2731
2732 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2733 // which is returned in RAX / RDX.
2734 if (Subtarget.is64Bit()) {
2735 if (ValVT == MVT::x86mmx) {
2736 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2737 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2738 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2739 ValToCopy);
2740 // If we don't have SSE2 available, convert to v4f32 so the generated
2741 // register is legal.
2742 if (!Subtarget.hasSSE2())
2743 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2744 }
2745 }
2746 }
2747
2748 if (VA.needsCustom()) {
2749 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2750, __PRETTY_FUNCTION__))
2750 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2750, __PRETTY_FUNCTION__))
;
2751
2752 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2753 Subtarget);
2754
2755 // Add the second register to the CalleeSaveDisableRegs list.
2756 if (ShouldDisableCalleeSavedRegister)
2757 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2758 } else {
2759 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2760 }
2761 }
2762
2763 SDValue Flag;
2764 SmallVector<SDValue, 6> RetOps;
2765 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2766 // Operand #1 = Bytes To Pop
2767 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2768 MVT::i32));
2769
2770 // Copy the result values into the output registers.
2771 for (auto &RetVal : RetVals) {
2772 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2773 RetOps.push_back(RetVal.second);
2774 continue; // Don't emit a copytoreg.
2775 }
2776
2777 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2778 Flag = Chain.getValue(1);
2779 RetOps.push_back(
2780 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
2781 }
2782
2783 // Swift calling convention does not require we copy the sret argument
2784 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2785
2786 // All x86 ABIs require that for returning structs by value we copy
2787 // the sret argument into %rax/%eax (depending on ABI) for the return.
2788 // We saved the argument into a virtual register in the entry block,
2789 // so now we copy the value out and into %rax/%eax.
2790 //
2791 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2792 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2793 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2794 // either case FuncInfo->setSRetReturnReg() will have been called.
2795 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
2796 // When we have both sret and another return value, we should use the
2797 // original Chain stored in RetOps[0], instead of the current Chain updated
2798 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2799
2800 // For the case of sret and another return value, we have
2801 // Chain_0 at the function entry
2802 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2803 // If we use Chain_1 in getCopyFromReg, we will have
2804 // Val = getCopyFromReg(Chain_1)
2805 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2806
2807 // getCopyToReg(Chain_0) will be glued together with
2808 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2809 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2810 // Data dependency from Unit B to Unit A due to usage of Val in
2811 // getCopyToReg(Chain_1, Val)
2812 // Chain dependency from Unit A to Unit B
2813
2814 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2815 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2816 getPointerTy(MF.getDataLayout()));
2817
2818 Register RetValReg
2819 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2820 X86::RAX : X86::EAX;
2821 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2822 Flag = Chain.getValue(1);
2823
2824 // RAX/EAX now acts like a return value.
2825 RetOps.push_back(
2826 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2827
2828 // Add the returned register to the CalleeSaveDisableRegs list.
2829 if (ShouldDisableCalleeSavedRegister)
2830 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2831 }
2832
2833 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2834 const MCPhysReg *I =
2835 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2836 if (I) {
2837 for (; *I; ++I) {
2838 if (X86::GR64RegClass.contains(*I))
2839 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2840 else
2841 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2841)
;
2842 }
2843 }
2844
2845 RetOps[0] = Chain; // Update chain.
2846
2847 // Add the flag if we have it.
2848 if (Flag.getNode())
2849 RetOps.push_back(Flag);
2850
2851 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2852 if (CallConv == CallingConv::X86_INTR)
2853 opcode = X86ISD::IRET;
2854 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2855}
2856
2857bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2858 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2859 return false;
2860
2861 SDValue TCChain = Chain;
2862 SDNode *Copy = *N->use_begin();
2863 if (Copy->getOpcode() == ISD::CopyToReg) {
2864 // If the copy has a glue operand, we conservatively assume it isn't safe to
2865 // perform a tail call.
2866 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2867 return false;
2868 TCChain = Copy->getOperand(0);
2869 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2870 return false;
2871
2872 bool HasRet = false;
2873 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2874 UI != UE; ++UI) {
2875 if (UI->getOpcode() != X86ISD::RET_FLAG)
2876 return false;
2877 // If we are returning more than one value, we can definitely
2878 // not make a tail call see PR19530
2879 if (UI->getNumOperands() > 4)
2880 return false;
2881 if (UI->getNumOperands() == 4 &&
2882 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2883 return false;
2884 HasRet = true;
2885 }
2886
2887 if (!HasRet)
2888 return false;
2889
2890 Chain = TCChain;
2891 return true;
2892}
2893
2894EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2895 ISD::NodeType ExtendKind) const {
2896 MVT ReturnMVT = MVT::i32;
2897
2898 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2899 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2900 // The ABI does not require i1, i8 or i16 to be extended.
2901 //
2902 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2903 // always extending i8/i16 return values, so keep doing that for now.
2904 // (PR26665).
2905 ReturnMVT = MVT::i8;
2906 }
2907
2908 EVT MinVT = getRegisterType(Context, ReturnMVT);
2909 return VT.bitsLT(MinVT) ? MinVT : VT;
2910}
2911
2912/// Reads two 32 bit registers and creates a 64 bit mask value.
2913/// \param VA The current 32 bit value that need to be assigned.
2914/// \param NextVA The next 32 bit value that need to be assigned.
2915/// \param Root The parent DAG node.
2916/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2917/// glue purposes. In the case the DAG is already using
2918/// physical register instead of virtual, we should glue
2919/// our new SDValue to InFlag SDvalue.
2920/// \return a new SDvalue of size 64bit.
2921static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2922 SDValue &Root, SelectionDAG &DAG,
2923 const SDLoc &Dl, const X86Subtarget &Subtarget,
2924 SDValue *InFlag = nullptr) {
2925 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2925, __PRETTY_FUNCTION__))
;
2926 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2926, __PRETTY_FUNCTION__))
;
2927 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2928, __PRETTY_FUNCTION__))
2928 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2928, __PRETTY_FUNCTION__))
;
2929 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2930, __PRETTY_FUNCTION__))
2930 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2930, __PRETTY_FUNCTION__))
;
2931 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2932, __PRETTY_FUNCTION__))
2932 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2932, __PRETTY_FUNCTION__))
;
2933
2934 SDValue Lo, Hi;
2935 SDValue ArgValueLo, ArgValueHi;
2936
2937 MachineFunction &MF = DAG.getMachineFunction();
2938 const TargetRegisterClass *RC = &X86::GR32RegClass;
2939
2940 // Read a 32 bit value from the registers.
2941 if (nullptr == InFlag) {
2942 // When no physical register is present,
2943 // create an intermediate virtual register.
2944 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
2945 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2946 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2947 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2948 } else {
2949 // When a physical register is available read the value from it and glue
2950 // the reads together.
2951 ArgValueLo =
2952 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2953 *InFlag = ArgValueLo.getValue(2);
2954 ArgValueHi =
2955 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2956 *InFlag = ArgValueHi.getValue(2);
2957 }
2958
2959 // Convert the i32 type into v32i1 type.
2960 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2961
2962 // Convert the i32 type into v32i1 type.
2963 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2964
2965 // Concatenate the two values together.
2966 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2967}
2968
2969/// The function will lower a register of various sizes (8/16/32/64)
2970/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2971/// \returns a DAG node contains the operand after lowering to mask type.
2972static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2973 const EVT &ValLoc, const SDLoc &Dl,
2974 SelectionDAG &DAG) {
2975 SDValue ValReturned = ValArg;
2976
2977 if (ValVT == MVT::v1i1)
2978 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2979
2980 if (ValVT == MVT::v64i1) {
2981 // In 32 bit machine, this case is handled by getv64i1Argument
2982 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2982, __PRETTY_FUNCTION__))
;
2983 // In 64 bit machine, There is no need to truncate the value only bitcast
2984 } else {
2985 MVT maskLen;
2986 switch (ValVT.getSimpleVT().SimpleTy) {
2987 case MVT::v8i1:
2988 maskLen = MVT::i8;
2989 break;
2990 case MVT::v16i1:
2991 maskLen = MVT::i16;
2992 break;
2993 case MVT::v32i1:
2994 maskLen = MVT::i32;
2995 break;
2996 default:
2997 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2997)
;
2998 }
2999
3000 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3001 }
3002 return DAG.getBitcast(ValVT, ValReturned);
3003}
3004
3005/// Lower the result values of a call into the
3006/// appropriate copies out of appropriate physical registers.
3007///
3008SDValue X86TargetLowering::LowerCallResult(
3009 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3010 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3011 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3012 uint32_t *RegMask) const {
3013
3014 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3015 // Assign locations to each value returned by this call.
3016 SmallVector<CCValAssign, 16> RVLocs;
3017 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3018 *DAG.getContext());
3019 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3020
3021 // Copy all of the result registers out of their specified physreg.
3022 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3023 ++I, ++InsIndex) {
3024 CCValAssign &VA = RVLocs[I];
3025 EVT CopyVT = VA.getLocVT();
3026
3027 // In some calling conventions we need to remove the used registers
3028 // from the register mask.
3029 if (RegMask) {
3030 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3031 SubRegs.isValid(); ++SubRegs)
3032 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3033 }
3034
3035 // Report an error if there was an attempt to return FP values via XMM
3036 // registers.
3037 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3038 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3039 if (VA.getLocReg() == X86::XMM1)
3040 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3041 else
3042 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3043 } else if (!Subtarget.hasSSE2() &&
3044 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3045 CopyVT == MVT::f64) {
3046 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3047 if (VA.getLocReg() == X86::XMM1)
3048 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3049 else
3050 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3051 }
3052
3053 // If we prefer to use the value in xmm registers, copy it out as f80 and
3054 // use a truncate to move it from fp stack reg to xmm reg.
3055 bool RoundAfterCopy = false;
3056 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3057 isScalarFPTypeInSSEReg(VA.getValVT())) {
3058 if (!Subtarget.hasX87())
3059 report_fatal_error("X87 register return with X87 disabled");
3060 CopyVT = MVT::f80;
3061 RoundAfterCopy = (CopyVT != VA.getLocVT());
3062 }
3063
3064 SDValue Val;
3065 if (VA.needsCustom()) {
3066 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3067, __PRETTY_FUNCTION__))
3067 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3067, __PRETTY_FUNCTION__))
;
3068 Val =
3069 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3070 } else {
3071 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3072 .getValue(1);
3073 Val = Chain.getValue(0);
3074 InFlag = Chain.getValue(2);
3075 }
3076
3077 if (RoundAfterCopy)
3078 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3079 // This truncation won't change the value.
3080 DAG.getIntPtrConstant(1, dl));
3081
3082 if (VA.isExtInLoc()) {
3083 if (VA.getValVT().isVector() &&
3084 VA.getValVT().getScalarType() == MVT::i1 &&
3085 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3086 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3087 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3088 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3089 } else
3090 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3091 }
3092
3093 if (VA.getLocInfo() == CCValAssign::BCvt)
3094 Val = DAG.getBitcast(VA.getValVT(), Val);
3095
3096 InVals.push_back(Val);
3097 }
3098
3099 return Chain;
3100}
3101
3102//===----------------------------------------------------------------------===//
3103// C & StdCall & Fast Calling Convention implementation
3104//===----------------------------------------------------------------------===//
3105// StdCall calling convention seems to be standard for many Windows' API
3106// routines and around. It differs from C calling convention just a little:
3107// callee should clean up the stack, not caller. Symbols should be also
3108// decorated in some fancy way :) It doesn't support any vector arguments.
3109// For info on fast calling convention see Fast Calling Convention (tail call)
3110// implementation LowerX86_32FastCCCallTo.
3111
3112/// CallIsStructReturn - Determines whether a call uses struct return
3113/// semantics.
3114enum StructReturnType {
3115 NotStructReturn,
3116 RegStructReturn,
3117 StackStructReturn
3118};
3119static StructReturnType
3120callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3121 if (Outs.empty())
3122 return NotStructReturn;
3123
3124 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3125 if (!Flags.isSRet())
3126 return NotStructReturn;
3127 if (Flags.isInReg() || IsMCU)
3128 return RegStructReturn;
3129 return StackStructReturn;
3130}
3131
3132/// Determines whether a function uses struct return semantics.
3133static StructReturnType
3134argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3135 if (Ins.empty())
3136 return NotStructReturn;
3137
3138 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3139 if (!Flags.isSRet())
3140 return NotStructReturn;
3141 if (Flags.isInReg() || IsMCU)
3142 return RegStructReturn;
3143 return StackStructReturn;
3144}
3145
3146/// Make a copy of an aggregate at address specified by "Src" to address
3147/// "Dst" with size and alignment information specified by the specific
3148/// parameter attribute. The copy will be passed as a byval function parameter.
3149static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3150 SDValue Chain, ISD::ArgFlagsTy Flags,
3151 SelectionDAG &DAG, const SDLoc &dl) {
3152 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3153
3154 return DAG.getMemcpy(
3155 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3156 /*isVolatile*/ false, /*AlwaysInline=*/true,
3157 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3158}
3159
3160/// Return true if the calling convention is one that we can guarantee TCO for.
3161static bool canGuaranteeTCO(CallingConv::ID CC) {
3162 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3163 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3164 CC == CallingConv::HHVM || CC == CallingConv::Tail);
3165}
3166
3167/// Return true if we might ever do TCO for calls with this calling convention.
3168static bool mayTailCallThisCC(CallingConv::ID CC) {
3169 switch (CC) {
3170 // C calling conventions:
3171 case CallingConv::C:
3172 case CallingConv::Win64:
3173 case CallingConv::X86_64_SysV:
3174 // Callee pop conventions:
3175 case CallingConv::X86_ThisCall:
3176 case CallingConv::X86_StdCall:
3177 case CallingConv::X86_VectorCall:
3178 case CallingConv::X86_FastCall:
3179 // Swift:
3180 case CallingConv::Swift:
3181 return true;
3182 default:
3183 return canGuaranteeTCO(CC);
3184 }
3185}
3186
3187/// Return true if the function is being made into a tailcall target by
3188/// changing its ABI.
3189static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3190 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
3191}
3192
3193bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3194 if (!CI->isTailCall())
3195 return false;
3196
3197 CallingConv::ID CalleeCC = CI->getCallingConv();
3198 if (!mayTailCallThisCC(CalleeCC))
3199 return false;
3200
3201 return true;
3202}
3203
3204SDValue
3205X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3206 const SmallVectorImpl<ISD::InputArg> &Ins,
3207 const SDLoc &dl, SelectionDAG &DAG,
3208 const CCValAssign &VA,
3209 MachineFrameInfo &MFI, unsigned i) const {
3210 // Create the nodes corresponding to a load from this parameter slot.
3211 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3212 bool AlwaysUseMutable = shouldGuaranteeTCO(
3213 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3214 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3215 EVT ValVT;
3216 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3217
3218 // If value is passed by pointer we have address passed instead of the value
3219 // itself. No need to extend if the mask value and location share the same
3220 // absolute size.
3221 bool ExtendedInMem =
3222 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3223 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3224
3225 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3226 ValVT = VA.getLocVT();
3227 else
3228 ValVT = VA.getValVT();
3229
3230 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3231 // changed with more analysis.
3232 // In case of tail call optimization mark all arguments mutable. Since they
3233 // could be overwritten by lowering of arguments in case of a tail call.
3234 if (Flags.isByVal()) {
3235 unsigned Bytes = Flags.getByValSize();
3236 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3237
3238 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3239 // can be improved with deeper analysis.
3240 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3241 /*isAliased=*/true);
3242 return DAG.getFrameIndex(FI, PtrVT);
3243 }
3244
3245 EVT ArgVT = Ins[i].ArgVT;
3246
3247 // If this is a vector that has been split into multiple parts, and the
3248 // scalar size of the parts don't match the vector element size, then we can't
3249 // elide the copy. The parts will have padding between them instead of being
3250 // packed like a vector.
3251 bool ScalarizedAndExtendedVector =
3252 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3253 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3254
3255 // This is an argument in memory. We might be able to perform copy elision.
3256 // If the argument is passed directly in memory without any extension, then we
3257 // can perform copy elision. Large vector types, for example, may be passed
3258 // indirectly by pointer.
3259 if (Flags.isCopyElisionCandidate() &&
3260 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3261 !ScalarizedAndExtendedVector) {
3262 SDValue PartAddr;
3263 if (Ins[i].PartOffset == 0) {
3264 // If this is a one-part value or the first part of a multi-part value,
3265 // create a stack object for the entire argument value type and return a
3266 // load from our portion of it. This assumes that if the first part of an
3267 // argument is in memory, the rest will also be in memory.
3268 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3269 /*IsImmutable=*/false);
3270 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3271 return DAG.getLoad(
3272 ValVT, dl, Chain, PartAddr,
3273 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3274 } else {
3275 // This is not the first piece of an argument in memory. See if there is
3276 // already a fixed stack object including this offset. If so, assume it
3277 // was created by the PartOffset == 0 branch above and create a load from
3278 // the appropriate offset into it.
3279 int64_t PartBegin = VA.getLocMemOffset();
3280 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3281 int FI = MFI.getObjectIndexBegin();
3282 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3283 int64_t ObjBegin = MFI.getObjectOffset(FI);
3284 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3285 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3286 break;
3287 }
3288 if (MFI.isFixedObjectIndex(FI)) {
3289 SDValue Addr =
3290 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3291 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3292 return DAG.getLoad(
3293 ValVT, dl, Chain, Addr,
3294 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3295 Ins[i].PartOffset));
3296 }
3297 }
3298 }
3299
3300 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3301 VA.getLocMemOffset(), isImmutable);
3302
3303 // Set SExt or ZExt flag.
3304 if (VA.getLocInfo() == CCValAssign::ZExt) {
3305 MFI.setObjectZExt(FI, true);
3306 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3307 MFI.setObjectSExt(FI, true);
3308 }
3309
3310 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3311 SDValue Val = DAG.getLoad(
3312 ValVT, dl, Chain, FIN,
3313 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3314 return ExtendedInMem
3315 ? (VA.getValVT().isVector()
3316 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3317 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3318 : Val;
3319}
3320
3321// FIXME: Get this from tablegen.
3322static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3323 const X86Subtarget &Subtarget) {
3324 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3324, __PRETTY_FUNCTION__))
;
3325
3326 if (Subtarget.isCallingConvWin64(CallConv)) {
3327 static const MCPhysReg GPR64ArgRegsWin64[] = {
3328 X86::RCX, X86::RDX, X86::R8, X86::R9
3329 };
3330 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3331 }
3332
3333 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3334 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3335 };
3336 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3337}
3338
3339// FIXME: Get this from tablegen.
3340static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3341 CallingConv::ID CallConv,
3342 const X86Subtarget &Subtarget) {
3343 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3343, __PRETTY_FUNCTION__))
;
3344 if (Subtarget.isCallingConvWin64(CallConv)) {
3345 // The XMM registers which might contain var arg parameters are shadowed
3346 // in their paired GPR. So we only need to save the GPR to their home
3347 // slots.
3348 // TODO: __vectorcall will change this.
3349 return None;
3350 }
3351
3352 const Function &F = MF.getFunction();
3353 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3354 bool isSoftFloat = Subtarget.useSoftFloat();
3355 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3356, __PRETTY_FUNCTION__))
3356 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3356, __PRETTY_FUNCTION__))
;
3357 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3358 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3359 // registers.
3360 return None;
3361
3362 static const MCPhysReg XMMArgRegs64Bit[] = {
3363 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3364 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3365 };
3366 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3367}
3368
3369#ifndef NDEBUG
3370static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3371 return llvm::is_sorted(
3372 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3373 return A.getValNo() < B.getValNo();
3374 });
3375}
3376#endif
3377
3378namespace {
3379/// This is a helper class for lowering variable arguments parameters.
3380class VarArgsLoweringHelper {
3381public:
3382 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3383 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3384 CallingConv::ID CallConv, CCState &CCInfo)
3385 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3386 TheMachineFunction(DAG.getMachineFunction()),
3387 TheFunction(TheMachineFunction.getFunction()),
3388 FrameInfo(TheMachineFunction.getFrameInfo()),
3389 FrameLowering(*Subtarget.getFrameLowering()),
3390 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3391 CCInfo(CCInfo) {}
3392
3393 // Lower variable arguments parameters.
3394 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3395
3396private:
3397 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3398
3399 void forwardMustTailParameters(SDValue &Chain);
3400
3401 bool is64Bit() const { return Subtarget.is64Bit(); }
3402 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3403
3404 X86MachineFunctionInfo *FuncInfo;
3405 const SDLoc &DL;
3406 SelectionDAG &DAG;
3407 const X86Subtarget &Subtarget;
3408 MachineFunction &TheMachineFunction;
3409 const Function &TheFunction;
3410 MachineFrameInfo &FrameInfo;
3411 const TargetFrameLowering &FrameLowering;
3412 const TargetLowering &TargLowering;
3413 CallingConv::ID CallConv;
3414 CCState &CCInfo;
3415};
3416} // namespace
3417
3418void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3419 SDValue &Chain, unsigned StackSize) {
3420 // If the function takes variable number of arguments, make a frame index for
3421 // the start of the first vararg value... for expansion of llvm.va_start. We
3422 // can skip this if there are no va_start calls.
3423 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3424 CallConv != CallingConv::X86_ThisCall)) {
3425 FuncInfo->setVarArgsFrameIndex(
3426 FrameInfo.CreateFixedObject(1, StackSize, true));
3427 }
3428
3429 // Figure out if XMM registers are in use.
3430 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3432, __PRETTY_FUNCTION__))
3431 TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3432, __PRETTY_FUNCTION__))
3432 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3432, __PRETTY_FUNCTION__))
;
3433
3434 // 64-bit calling conventions support varargs and register parameters, so we
3435 // have to do extra work to spill them in the prologue.
3436 if (is64Bit()) {
3437 // Find the first unallocated argument registers.
3438 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3439 ArrayRef<MCPhysReg> ArgXMMs =
3440 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3441 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3442 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3443
3444 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3445, __PRETTY_FUNCTION__))
3445 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3445, __PRETTY_FUNCTION__))
;
3446
3447 if (isWin64()) {
3448 // Get to the caller-allocated home save location. Add 8 to account
3449 // for the return address.
3450 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3451 FuncInfo->setRegSaveFrameIndex(
3452 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3453 // Fixup to set vararg frame on shadow area (4 x i64).
3454 if (NumIntRegs < 4)
3455 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3456 } else {
3457 // For X86-64, if there are vararg parameters that are passed via
3458 // registers, then we must store them to their spots on the stack so
3459 // they may be loaded by dereferencing the result of va_next.
3460 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3461 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3462 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3463 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3464 }
3465
3466 SmallVector<SDValue, 6>
3467 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3468 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3469 // keeping live input value
3470 SDValue ALVal; // if applicable keeps SDValue for %al register
3471
3472 // Gather all the live in physical registers.
3473 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3474 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3475 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3476 }
3477 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3478 if (!AvailableXmms.empty()) {
3479 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3480 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3481 for (MCPhysReg Reg : AvailableXmms) {
3482 Register XMMReg = TheMachineFunction.addLiveIn(Reg, &X86::VR128RegClass);
3483 LiveXMMRegs.push_back(
3484 DAG.getCopyFromReg(Chain, DL, XMMReg, MVT::v4f32));
3485 }
3486 }
3487
3488 // Store the integer parameter registers.
3489 SmallVector<SDValue, 8> MemOps;
3490 SDValue RSFIN =
3491 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3492 TargLowering.getPointerTy(DAG.getDataLayout()));
3493 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3494 for (SDValue Val : LiveGPRs) {
3495 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3496 TargLowering.getPointerTy(DAG.getDataLayout()),
3497 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3498 SDValue Store =
3499 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3500 MachinePointerInfo::getFixedStack(
3501 DAG.getMachineFunction(),
3502 FuncInfo->getRegSaveFrameIndex(), Offset));
3503 MemOps.push_back(Store);
3504 Offset += 8;
3505 }
3506
3507 // Now store the XMM (fp + vector) parameter registers.
3508 if (!LiveXMMRegs.empty()) {
3509 SmallVector<SDValue, 12> SaveXMMOps;
3510 SaveXMMOps.push_back(Chain);
3511 SaveXMMOps.push_back(ALVal);
3512 SaveXMMOps.push_back(
3513 DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
3514 SaveXMMOps.push_back(
3515 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3516 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3517 LiveXMMRegs.end());
3518 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
3519 MVT::Other, SaveXMMOps));
3520 }
3521
3522 if (!MemOps.empty())
3523 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3524 }
3525}
3526
3527void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3528 // Find the largest legal vector type.
3529 MVT VecVT = MVT::Other;
3530 // FIXME: Only some x86_32 calling conventions support AVX512.
3531 if (Subtarget.useAVX512Regs() &&
3532 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3533 CallConv == CallingConv::Intel_OCL_BI)))
3534 VecVT = MVT::v16f32;
3535 else if (Subtarget.hasAVX())
3536 VecVT = MVT::v8f32;
3537 else if (Subtarget.hasSSE2())
3538 VecVT = MVT::v4f32;
3539
3540 // We forward some GPRs and some vector types.
3541 SmallVector<MVT, 2> RegParmTypes;
3542 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3543 RegParmTypes.push_back(IntVT);
3544 if (VecVT != MVT::Other)
3545 RegParmTypes.push_back(VecVT);
3546
3547 // Compute the set of forwarded registers. The rest are scratch.
3548 SmallVectorImpl<ForwardedRegister> &Forwards =
3549 FuncInfo->getForwardedMustTailRegParms();
3550 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3551
3552 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3553 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3554 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3555 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3556 }
3557
3558 // Copy all forwards from physical to virtual registers.
3559 for (ForwardedRegister &FR : Forwards) {
3560 // FIXME: Can we use a less constrained schedule?
3561 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3562 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3563 TargLowering.getRegClassFor(FR.VT));
3564 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3565 }
3566}
3567
3568void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3569 unsigned StackSize) {
3570 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3571 // If necessary, it would be set into the correct value later.
3572 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3573 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3574
3575 if (FrameInfo.hasVAStart())
3576 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3577
3578 if (FrameInfo.hasMustTailInVarArgFunc())
3579 forwardMustTailParameters(Chain);
3580}
3581
3582SDValue X86TargetLowering::LowerFormalArguments(
3583 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3584 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3585 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3586 MachineFunction &MF = DAG.getMachineFunction();
3587 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3588
3589 const Function &F = MF.getFunction();
3590 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3591 F.getName() == "main")
3592 FuncInfo->setForceFramePointer(true);
3593
3594 MachineFrameInfo &MFI = MF.getFrameInfo();
3595 bool Is64Bit = Subtarget.is64Bit();
3596 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3597
3598 assert(((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
3599 !(IsVarArg && canGuaranteeTCO(CallConv)) &&((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
3600 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
;
3601
3602 // Assign locations to all of the incoming arguments.
3603 SmallVector<CCValAssign, 16> ArgLocs;
3604 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3605
3606 // Allocate shadow area for Win64.
3607 if (IsWin64)
3608 CCInfo.AllocateStack(32, Align(8));
3609
3610 CCInfo.AnalyzeArguments(Ins, CC_X86);
3611
3612 // In vectorcall calling convention a second pass is required for the HVA
3613 // types.
3614 if (CallingConv::X86_VectorCall == CallConv) {
3615 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3616 }
3617
3618 // The next loop assumes that the locations are in the same order of the
3619 // input arguments.
3620 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __PRETTY_FUNCTION__))
3621 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __PRETTY_FUNCTION__))
;
3622
3623 SDValue ArgValue;
3624 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3625 ++I, ++InsIndex) {
3626 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3626, __PRETTY_FUNCTION__))
;
3627 CCValAssign &VA = ArgLocs[I];
3628
3629 if (VA.isRegLoc()) {
3630 EVT RegVT = VA.getLocVT();
3631 if (VA.needsCustom()) {
3632 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3634, __PRETTY_FUNCTION__))
3633 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3634, __PRETTY_FUNCTION__))
3634 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3634, __PRETTY_FUNCTION__))
;
3635
3636 // v64i1 values, in regcall calling convention, that are
3637 // compiled to 32 bit arch, are split up into two registers.
3638 ArgValue =
3639 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3640 } else {
3641 const TargetRegisterClass *RC;
3642 if (RegVT == MVT::i8)
3643 RC = &X86::GR8RegClass;
3644 else if (RegVT == MVT::i16)
3645 RC = &X86::GR16RegClass;
3646 else if (RegVT == MVT::i32)
3647 RC = &X86::GR32RegClass;
3648 else if (Is64Bit && RegVT == MVT::i64)
3649 RC = &X86::GR64RegClass;
3650 else if (RegVT == MVT::f32)
3651 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3652 else if (RegVT == MVT::f64)
3653 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3654 else if (RegVT == MVT::f80)
3655 RC = &X86::RFP80RegClass;
3656 else if (RegVT == MVT::f128)
3657 RC = &X86::VR128RegClass;
3658 else if (RegVT.is512BitVector())
3659 RC = &X86::VR512RegClass;
3660 else if (RegVT.is256BitVector())
3661 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3662 else if (RegVT.is128BitVector())
3663 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3664 else if (RegVT == MVT::x86mmx)
3665 RC = &X86::VR64RegClass;
3666 else if (RegVT == MVT::v1i1)
3667 RC = &X86::VK1RegClass;
3668 else if (RegVT == MVT::v8i1)
3669 RC = &X86::VK8RegClass;
3670 else if (RegVT == MVT::v16i1)
3671 RC = &X86::VK16RegClass;
3672 else if (RegVT == MVT::v32i1)
3673 RC = &X86::VK32RegClass;
3674 else if (RegVT == MVT::v64i1)
3675 RC = &X86::VK64RegClass;
3676 else
3677 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3677)
;
3678
3679 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3680 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3681 }
3682
3683 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3684 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3685 // right size.
3686 if (VA.getLocInfo() == CCValAssign::SExt)
3687 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3688 DAG.getValueType(VA.getValVT()));
3689 else if (VA.getLocInfo() == CCValAssign::ZExt)
3690 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3691 DAG.getValueType(VA.getValVT()));
3692 else if (VA.getLocInfo() == CCValAssign::BCvt)
3693 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3694
3695 if (VA.isExtInLoc()) {
3696 // Handle MMX values passed in XMM regs.
3697 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3698 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3699 else if (VA.getValVT().isVector() &&
3700 VA.getValVT().getScalarType() == MVT::i1 &&
3701 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3702 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3703 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3704 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3705 } else
3706 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3707 }
3708 } else {
3709 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3709, __PRETTY_FUNCTION__))
;
3710 ArgValue =
3711 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3712 }
3713
3714 // If value is passed via pointer - do a load.
3715 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3716 ArgValue =
3717 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3718
3719 InVals.push_back(ArgValue);
3720 }
3721
3722 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3723 // Swift calling convention does not require we copy the sret argument
3724 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3725 if (CallConv == CallingConv::Swift)
3726 continue;
3727
3728 // All x86 ABIs require that for returning structs by value we copy the
3729 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3730 // the argument into a virtual register so that we can access it from the
3731 // return points.
3732 if (Ins[I].Flags.isSRet()) {
3733 Register Reg = FuncInfo->getSRetReturnReg();
3734 if (!Reg) {
3735 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3736 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3737 FuncInfo->setSRetReturnReg(Reg);
3738 }
3739 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3740 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3741 break;
3742 }
3743 }
3744
3745 unsigned StackSize = CCInfo.getNextStackOffset();
3746 // Align stack specially for tail calls.
3747 if (shouldGuaranteeTCO(CallConv,
3748 MF.getTarget().Options.GuaranteedTailCallOpt))
3749 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3750
3751 if (IsVarArg)
3752 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
3753 .lowerVarArgsParameters(Chain, StackSize);
3754
3755 // Some CCs need callee pop.
3756 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
3757 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3758 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3759 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3760 // X86 interrupts must pop the error code (and the alignment padding) if
3761 // present.
3762 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3763 } else {
3764 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3765 // If this is an sret function, the return should pop the hidden pointer.
3766 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3767 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3768 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3769 FuncInfo->setBytesToPopOnReturn(4);
3770 }
3771
3772 if (!Is64Bit) {
3773 // RegSaveFrameIndex is X86-64 only.
3774 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3775 }
3776
3777 FuncInfo->setArgumentStackSize(StackSize);
3778
3779 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3780 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3781 if (Personality == EHPersonality::CoreCLR) {
3782 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3782, __PRETTY_FUNCTION__))
;
3783 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3784 // that we'd prefer this slot be allocated towards the bottom of the frame
3785 // (i.e. near the stack pointer after allocating the frame). Every
3786 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3787 // offset from the bottom of this and each funclet's frame must be the
3788 // same, so the size of funclets' (mostly empty) frames is dictated by
3789 // how far this slot is from the bottom (since they allocate just enough
3790 // space to accommodate holding this slot at the correct offset).
3791 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
3792 EHInfo->PSPSymFrameIdx = PSPSymFI;
3793 }
3794 }
3795
3796 if (CallConv == CallingConv::X86_RegCall ||
3797 F.hasFnAttribute("no_caller_saved_registers")) {
3798 MachineRegisterInfo &MRI = MF.getRegInfo();
3799 for (std::pair<Register, Register> Pair : MRI.liveins())
3800 MRI.disableCalleeSavedRegister(Pair.first);
3801 }
3802
3803 return Chain;
3804}
3805
3806SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3807 SDValue Arg, const SDLoc &dl,
3808 SelectionDAG &DAG,
3809 const CCValAssign &VA,
3810 ISD::ArgFlagsTy Flags,
3811 bool isByVal) const {
3812 unsigned LocMemOffset = VA.getLocMemOffset();
3813 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3814 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3815 StackPtr, PtrOff);
3816 if (isByVal)
3817 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3818
3819 return DAG.getStore(
3820 Chain, dl, Arg, PtrOff,
3821 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3822}
3823
3824/// Emit a load of return address if tail call
3825/// optimization is performed and it is required.
3826SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3827 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3828 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3829 // Adjust the Return address stack slot.
3830 EVT VT = getPointerTy(DAG.getDataLayout());
3831 OutRetAddr = getReturnAddressFrameIndex(DAG);
3832
3833 // Load the "old" Return address.
3834 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3835 return SDValue(OutRetAddr.getNode(), 1);
3836}
3837
3838/// Emit a store of the return address if tail call
3839/// optimization is performed and it is required (FPDiff!=0).
3840static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3841 SDValue Chain, SDValue RetAddrFrIdx,
3842 EVT PtrVT, unsigned SlotSize,
3843 int FPDiff, const SDLoc &dl) {
3844 // Store the return address to the appropriate stack slot.
3845 if (!FPDiff) return Chain;
3846 // Calculate the new stack slot for the return address.
3847 int NewReturnAddrFI =
3848 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3849 false);
3850 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3851 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3852 MachinePointerInfo::getFixedStack(
3853 DAG.getMachineFunction(), NewReturnAddrFI));
3854 return Chain;
3855}
3856
3857/// Returns a vector_shuffle mask for an movs{s|d}, movd
3858/// operation of specified width.
3859static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3860 SDValue V2) {
3861 unsigned NumElems = VT.getVectorNumElements();
3862 SmallVector<int, 8> Mask;
3863 Mask.push_back(NumElems);
3864 for (unsigned i = 1; i != NumElems; ++i)
3865 Mask.push_back(i);
3866 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3867}
3868
3869SDValue
3870X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3871 SmallVectorImpl<SDValue> &InVals) const {
3872 SelectionDAG &DAG = CLI.DAG;
3873 SDLoc &dl = CLI.DL;
3874 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3875 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3876 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3877 SDValue Chain = CLI.Chain;
3878 SDValue Callee = CLI.Callee;
3879 CallingConv::ID CallConv = CLI.CallConv;
3880 bool &isTailCall = CLI.IsTailCall;
3881 bool isVarArg = CLI.IsVarArg;
3882
3883 MachineFunction &MF = DAG.getMachineFunction();
3884 bool Is64Bit = Subtarget.is64Bit();
3885 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3886 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3887 bool IsSibcall = false;
3888 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3889 CallConv == CallingConv::Tail;
3890 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3891 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CB);
3892 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3893 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3894 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3895 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CB);
3896 bool HasNoCfCheck =
3897 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3898 bool IsIndirectCall = (CI && CI->isIndirectCall());
3899 const Module *M = MF.getMMI().getModule();
3900 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3901
3902 MachineFunction::CallSiteInfo CSInfo;
3903 if (CallConv == CallingConv::X86_INTR)
3904 report_fatal_error("X86 interrupts may not be called directly");
3905
3906 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
3907 // If we are using a GOT, disable tail calls to external symbols with
3908 // default visibility. Tail calling such a symbol requires using a GOT
3909 // relocation, which forces early binding of the symbol. This breaks code
3910 // that require lazy function symbol resolution. Using musttail or
3911 // GuaranteedTailCallOpt will override this.
3912 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3913 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3914 G->getGlobal()->hasDefaultVisibility()))
3915 isTailCall = false;
3916 }
3917
3918 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
3919 if (IsMustTail) {
3920 // Force this to be a tail call. The verifier rules are enough to ensure
3921 // that we can lower this successfully without moving the return address
3922 // around.
3923 isTailCall = true;
3924 } else if (isTailCall) {
3925 // Check if it's really possible to do a tail call.
3926 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3927 isVarArg, SR != NotStructReturn,
3928 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3929 Outs, OutVals, Ins, DAG);
3930
3931 // Sibcalls are automatically detected tailcalls which do not require
3932 // ABI changes.
3933 if (!IsGuaranteeTCO && isTailCall)
3934 IsSibcall = true;
3935
3936 if (isTailCall)
3937 ++NumTailCalls;
3938 }
3939
3940 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3941, __PRETTY_FUNCTION__))
3941 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3941, __PRETTY_FUNCTION__))
;
3942
3943 // Analyze operands of the call, assigning locations to each operand.
3944 SmallVector<CCValAssign, 16> ArgLocs;
3945 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3946
3947 // Allocate shadow area for Win64.
3948 if (IsWin64)
3949 CCInfo.AllocateStack(32, Align(8));
3950
3951 CCInfo.AnalyzeArguments(Outs, CC_X86);
3952
3953 // In vectorcall calling convention a second pass is required for the HVA
3954 // types.
3955 if (CallingConv::X86_VectorCall == CallConv) {
3956 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3957 }
3958
3959 // Get a count of how many bytes are to be pushed on the stack.
3960 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3961 if (IsSibcall)
3962 // This is a sibcall. The memory operands are available in caller's
3963 // own caller's stack.
3964 NumBytes = 0;
3965 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
3966 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3967
3968 int FPDiff = 0;
3969 if (isTailCall && !IsSibcall && !IsMustTail) {
3970 // Lower arguments at fp - stackoffset + fpdiff.
3971 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3972
3973 FPDiff = NumBytesCallerPushed - NumBytes;
3974
3975 // Set the delta of movement of the returnaddr stackslot.
3976 // But only set if delta is greater than previous delta.
3977 if (FPDiff < X86Info->getTCReturnAddrDelta())
3978 X86Info->setTCReturnAddrDelta(FPDiff);
3979 }
3980
3981 unsigned NumBytesToPush = NumBytes;
3982 unsigned NumBytesToPop = NumBytes;
3983
3984 // If we have an inalloca argument, all stack space has already been allocated
3985 // for us and be right at the top of the stack. We don't support multiple
3986 // arguments passed in memory when using inalloca.
3987 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3988 NumBytesToPush = 0;
3989 if (!ArgLocs.back().isMemLoc())
3990 report_fatal_error("cannot use inalloca attribute on a register "
3991 "parameter");
3992 if (ArgLocs.back().getLocMemOffset() != 0)
3993 report_fatal_error("any parameter with the inalloca attribute must be "
3994 "the only memory argument");
3995 } else if (CLI.IsPreallocated) {
3996 assert(ArgLocs.back().isMemLoc() &&((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3998, __PRETTY_FUNCTION__))
3997 "cannot use preallocated attribute on a register "((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3998, __PRETTY_FUNCTION__))
3998 "parameter")((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3998, __PRETTY_FUNCTION__))
;
3999 SmallVector<size_t, 4> PreallocatedOffsets;
4000 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4001 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4002 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4003 }
4004 }
4005 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4006 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4007 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4008 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4009 NumBytesToPush = 0;
4010 }
4011
4012 if (!IsSibcall && !IsMustTail)
4013 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4014 NumBytes - NumBytesToPush, dl);
4015
4016 SDValue RetAddrFrIdx;
4017 // Load return address for tail calls.
4018 if (isTailCall && FPDiff)
4019 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4020 Is64Bit, FPDiff, dl);
4021
4022 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4023 SmallVector<SDValue, 8> MemOpChains;
4024 SDValue StackPtr;
4025
4026 // The next loop assumes that the locations are in the same order of the
4027 // input arguments.
4028 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4029, __PRETTY_FUNCTION__))
4029 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4029, __PRETTY_FUNCTION__))
;
4030
4031 // Walk the register/memloc assignments, inserting copies/loads. In the case
4032 // of tail call optimization arguments are handle later.
4033 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4034 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4035 ++I, ++OutIndex) {
4036 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4036, __PRETTY_FUNCTION__))
;
4037 // Skip inalloca/preallocated arguments, they have already been written.
4038 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4039 if (Flags.isInAlloca() || Flags.isPreallocated())
4040 continue;
4041
4042 CCValAssign &VA = ArgLocs[I];
4043 EVT RegVT = VA.getLocVT();
4044 SDValue Arg = OutVals[OutIndex];
4045 bool isByVal = Flags.isByVal();
4046
4047 // Promote the value if needed.
4048 switch (VA.getLocInfo()) {
4049 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4049)
;
4050 case CCValAssign::Full: break;
4051 case CCValAssign::SExt:
4052 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4053 break;
4054 case CCValAssign::ZExt:
4055 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4056 break;
4057 case CCValAssign::AExt:
4058 if (Arg.getValueType().isVector() &&
4059 Arg.getValueType().getVectorElementType() == MVT::i1)
4060 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4061 else if (RegVT.is128BitVector()) {
4062 // Special case: passing MMX values in XMM registers.
4063 Arg = DAG.getBitcast(MVT::i64, Arg);
4064 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4065 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4066 } else
4067 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4068 break;
4069 case CCValAssign::BCvt:
4070 Arg = DAG.getBitcast(RegVT, Arg);
4071 break;
4072 case CCValAssign::Indirect: {
4073 if (isByVal) {
4074 // Memcpy the argument to a temporary stack slot to prevent
4075 // the caller from seeing any modifications the callee may make
4076 // as guaranteed by the `byval` attribute.
4077 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4078 Flags.getByValSize(),
4079 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4080 SDValue StackSlot =
4081 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4082 Chain =
4083 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4084 // From now on treat this as a regular pointer
4085 Arg = StackSlot;
4086 isByVal = false;
4087 } else {
4088 // Store the argument.
4089 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4090 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4091 Chain = DAG.getStore(
4092 Chain, dl, Arg, SpillSlot,
4093 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4094 Arg = SpillSlot;
4095 }
4096 break;
4097 }
4098 }
4099
4100 if (VA.needsCustom()) {
4101 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4102, __PRETTY_FUNCTION__))
4102 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4102, __PRETTY_FUNCTION__))
;
4103 // Split v64i1 value into two registers
4104 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4105 } else if (VA.isRegLoc()) {
4106 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4107 const TargetOptions &Options = DAG.getTarget().Options;
4108 if (Options.EmitCallSiteInfo)
4109 CSInfo.emplace_back(VA.getLocReg(), I);
4110 if (isVarArg && IsWin64) {
4111 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4112 // shadow reg if callee is a varargs function.
4113 Register ShadowReg;
4114 switch (VA.getLocReg()) {
4115 case X86::XMM0: ShadowReg = X86::RCX; break;
4116 case X86::XMM1: ShadowReg = X86::RDX; break;
4117 case X86::XMM2: ShadowReg = X86::R8; break;
4118 case X86::XMM3: ShadowReg = X86::R9; break;
4119 }
4120 if (ShadowReg)
4121 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4122 }
4123 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4124 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4124, __PRETTY_FUNCTION__))
;
4125 if (!StackPtr.getNode())
4126 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4127 getPointerTy(DAG.getDataLayout()));
4128 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4129 dl, DAG, VA, Flags, isByVal));
4130 }
4131 }
4132
4133 if (!MemOpChains.empty())
4134 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4135
4136 if (Subtarget.isPICStyleGOT()) {
4137 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4138 // GOT pointer (except regcall).
4139 if (!isTailCall) {
4140 // Indirect call with RegCall calling convertion may use up all the
4141 // general registers, so it is not suitable to bind EBX reister for
4142 // GOT address, just let register allocator handle it.
4143 if (CallConv != CallingConv::X86_RegCall)
4144 RegsToPass.push_back(std::make_pair(
4145 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4146 getPointerTy(DAG.getDataLayout()))));
4147 } else {
4148 // If we are tail calling and generating PIC/GOT style code load the
4149 // address of the callee into ECX. The value in ecx is used as target of
4150 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4151 // for tail calls on PIC/GOT architectures. Normally we would just put the
4152 // address of GOT into ebx and then call target@PLT. But for tail calls
4153 // ebx would be restored (since ebx is callee saved) before jumping to the
4154 // target@PLT.
4155
4156 // Note: The actual moving to ECX is done further down.
4157 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4158 if (G && !G->getGlobal()->hasLocalLinkage() &&
4159 G->getGlobal()->hasDefaultVisibility())
4160 Callee = LowerGlobalAddress(Callee, DAG);
4161 else if (isa<ExternalSymbolSDNode>(Callee))
4162 Callee = LowerExternalSymbol(Callee, DAG);
4163 }
4164 }
4165
4166 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4167 // From AMD64 ABI document:
4168 // For calls that may call functions that use varargs or stdargs
4169 // (prototype-less calls or calls to functions containing ellipsis (...) in
4170 // the declaration) %al is used as hidden argument to specify the number
4171 // of SSE registers used. The contents of %al do not need to match exactly
4172 // the number of registers, but must be an ubound on the number of SSE
4173 // registers used and is in the range 0 - 8 inclusive.
4174
4175 // Count the number of XMM registers allocated.
4176 static const MCPhysReg XMMArgRegs[] = {
4177 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4178 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4179 };
4180 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4181 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4182, __PRETTY_FUNCTION__))
4182 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4182, __PRETTY_FUNCTION__))
;
4183 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4184 DAG.getConstant(NumXMMRegs, dl,
4185 MVT::i8)));
4186 }
4187
4188 if (isVarArg && IsMustTail) {
4189 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4190 for (const auto &F : Forwards) {
4191 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4192 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4193 }
4194 }
4195
4196 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4197 // don't need this because the eligibility check rejects calls that require
4198 // shuffling arguments passed in memory.
4199 if (!IsSibcall && isTailCall) {
4200 // Force all the incoming stack arguments to be loaded from the stack
4201 // before any new outgoing arguments are stored to the stack, because the
4202 // outgoing stack slots may alias the incoming argument stack slots, and
4203 // the alias isn't otherwise explicit. This is slightly more conservative
4204 // than necessary, because it means that each store effectively depends
4205 // on every argument instead of just those arguments it would clobber.
4206 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4207
4208 SmallVector<SDValue, 8> MemOpChains2;
4209 SDValue FIN;
4210 int FI = 0;
4211 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4212 ++I, ++OutsIndex) {
4213 CCValAssign &VA = ArgLocs[I];
4214
4215 if (VA.isRegLoc()) {
4216 if (VA.needsCustom()) {
4217 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4218, __PRETTY_FUNCTION__))
4218 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4218, __PRETTY_FUNCTION__))
;
4219 // This means that we are in special case where one argument was
4220 // passed through two register locations - Skip the next location
4221 ++I;
4222 }
4223
4224 continue;
4225 }
4226
4227 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4227, __PRETTY_FUNCTION__))
;
4228 SDValue Arg = OutVals[OutsIndex];
4229 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4230 // Skip inalloca/preallocated arguments. They don't require any work.
4231 if (Flags.isInAlloca() || Flags.isPreallocated())
4232 continue;
4233 // Create frame index.
4234 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4235 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4236 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4237 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4238
4239 if (Flags.isByVal()) {
4240 // Copy relative to framepointer.
4241 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4242 if (!StackPtr.getNode())
4243 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4244 getPointerTy(DAG.getDataLayout()));
4245 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4246 StackPtr, Source);
4247
4248 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4249 ArgChain,
4250 Flags, DAG, dl));
4251 } else {
4252 // Store relative to framepointer.
4253 MemOpChains2.push_back(DAG.getStore(
4254 ArgChain, dl, Arg, FIN,
4255 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4256 }
4257 }
4258
4259 if (!MemOpChains2.empty())
4260 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4261
4262 // Store the return address to the appropriate stack slot.
4263 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4264 getPointerTy(DAG.getDataLayout()),
4265 RegInfo->getSlotSize(), FPDiff, dl);
4266 }
4267
4268 // Build a sequence of copy-to-reg nodes chained together with token chain
4269 // and flag operands which copy the outgoing args into registers.
4270 SDValue InFlag;
4271 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4272 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4273 RegsToPass[i].second, InFlag);
4274 InFlag = Chain.getValue(1);
4275 }
4276
4277 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4278 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4278, __PRETTY_FUNCTION__))
;
4279 // In the 64-bit large code model, we have to make all calls
4280 // through a register, since the call instruction's 32-bit
4281 // pc-relative offset may not be large enough to hold the whole
4282 // address.
4283 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4284 Callee->getOpcode() == ISD::ExternalSymbol) {
4285 // Lower direct calls to global addresses and external symbols. Setting
4286 // ForCall to true here has the effect of removing WrapperRIP when possible
4287 // to allow direct calls to be selected without first materializing the
4288 // address into a register.
4289 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4290 } else if (Subtarget.isTarget64BitILP32() &&
4291 Callee->getValueType(0) == MVT::i32) {
4292 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4293 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4294 }
4295
4296 // Returns a chain & a flag for retval copy to use.
4297 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4298 SmallVector<SDValue, 8> Ops;
4299
4300 if (!IsSibcall && isTailCall && !IsMustTail) {
4301 Chain = DAG.getCALLSEQ_END(Chain,
4302 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4303 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4304 InFlag = Chain.getValue(1);
4305 }
4306
4307 Ops.push_back(Chain);
4308 Ops.push_back(Callee);
4309
4310 if (isTailCall)
4311 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4312
4313 // Add argument registers to the end of the list so that they are known live
4314 // into the call.
4315 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4316 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4317 RegsToPass[i].second.getValueType()));
4318
4319 // Add a register mask operand representing the call-preserved registers.
4320 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4321 // set X86_INTR calling convention because it has the same CSR mask
4322 // (same preserved registers).
4323 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4324 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4325 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4325, __PRETTY_FUNCTION__))
;
4326
4327 // If this is an invoke in a 32-bit function using a funclet-based
4328 // personality, assume the function clobbers all registers. If an exception
4329 // is thrown, the runtime will not restore CSRs.
4330 // FIXME: Model this more precisely so that we can register allocate across
4331 // the normal edge and spill and fill across the exceptional edge.
4332 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4333 const Function &CallerFn = MF.getFunction();
4334 EHPersonality Pers =
4335 CallerFn.hasPersonalityFn()
4336 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4337 : EHPersonality::Unknown;
4338 if (isFuncletEHPersonality(Pers))
4339 Mask = RegInfo->getNoPreservedMask();
4340 }
4341
4342 // Define a new register mask from the existing mask.
4343 uint32_t *RegMask = nullptr;
4344
4345 // In some calling conventions we need to remove the used physical registers
4346 // from the reg mask.
4347 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4348 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4349
4350 // Allocate a new Reg Mask and copy Mask.
4351 RegMask = MF.allocateRegMask();
4352 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4353 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4354
4355 // Make sure all sub registers of the argument registers are reset
4356 // in the RegMask.
4357 for (auto const &RegPair : RegsToPass)
4358 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4359 SubRegs.isValid(); ++SubRegs)
4360 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4361
4362 // Create the RegMask Operand according to our updated mask.
4363 Ops.push_back(DAG.getRegisterMask(RegMask));
4364 } else {
4365 // Create the RegMask Operand according to the static mask.
4366 Ops.push_back(DAG.getRegisterMask(Mask));
4367 }
4368
4369 if (InFlag.getNode())
4370 Ops.push_back(InFlag);
4371
4372 if (isTailCall) {
4373 // We used to do:
4374 //// If this is the first return lowered for this function, add the regs
4375 //// to the liveout set for the function.
4376 // This isn't right, although it's probably harmless on x86; liveouts
4377 // should be computed from returns not tail calls. Consider a void
4378 // function making a tail call to a function returning int.
4379 MF.getFrameInfo().setHasTailCall();
4380 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4381 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4382 return Ret;
4383 }
4384
4385 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4386 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4387 } else {
4388 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4389 }
4390 InFlag = Chain.getValue(1);
4391 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4392 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4393
4394 // Save heapallocsite metadata.
4395 if (CLI.CB)
4396 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4397 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4398
4399 // Create the CALLSEQ_END node.
4400 unsigned NumBytesForCalleeToPop;
4401 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4402 DAG.getTarget().Options.GuaranteedTailCallOpt))
4403 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4404 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4405 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4406 SR == StackStructReturn)
4407 // If this is a call to a struct-return function, the callee
4408 // pops the hidden struct pointer, so we have to push it back.
4409 // This is common for Darwin/X86, Linux & Mingw32 targets.
4410 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4411 NumBytesForCalleeToPop = 4;
4412 else
4413 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4414
4415 // Returns a flag for retval copy to use.
4416 if (!IsSibcall) {
4417 Chain = DAG.getCALLSEQ_END(Chain,
4418 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4419 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4420 true),
4421 InFlag, dl);
4422 InFlag = Chain.getValue(1);
4423 }
4424
4425 // Handle result values, copying them out of physregs into vregs that we
4426 // return.
4427 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4428 InVals, RegMask);
4429}
4430
4431//===----------------------------------------------------------------------===//
4432// Fast Calling Convention (tail call) implementation
4433//===----------------------------------------------------------------------===//
4434
4435// Like std call, callee cleans arguments, convention except that ECX is
4436// reserved for storing the tail called function address. Only 2 registers are
4437// free for argument passing (inreg). Tail call optimization is performed
4438// provided:
4439// * tailcallopt is enabled
4440// * caller/callee are fastcc
4441// On X86_64 architecture with GOT-style position independent code only local
4442// (within module) calls are supported at the moment.
4443// To keep the stack aligned according to platform abi the function
4444// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4445// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4446// If a tail called function callee has more arguments than the caller the
4447// caller needs to make sure that there is room to move the RETADDR to. This is
4448// achieved by reserving an area the size of the argument delta right after the
4449// original RETADDR, but before the saved framepointer or the spilled registers
4450// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4451// stack layout:
4452// arg1
4453// arg2
4454// RETADDR
4455// [ new RETADDR
4456// move area ]
4457// (possible EBP)
4458// ESI
4459// EDI
4460// local1 ..
4461
4462/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4463/// requirement.
4464unsigned
4465X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4466 SelectionDAG &DAG) const {
4467 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4468 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4469 assert(StackSize % SlotSize == 0 &&((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4470, __PRETTY_FUNCTION__))
4470 "StackSize must be a multiple of SlotSize")((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4470, __PRETTY_FUNCTION__))
;
4471 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4472}
4473
4474/// Return true if the given stack call argument is already available in the
4475/// same position (relatively) of the caller's incoming argument stack.
4476static
4477bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4478 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4479 const X86InstrInfo *TII, const CCValAssign &VA) {
4480 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4481
4482 for (;;) {
4483 // Look through nodes that don't alter the bits of the incoming value.
4484 unsigned Op = Arg.getOpcode();
4485 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4486 Arg = Arg.getOperand(0);
4487 continue;
4488 }
4489 if (Op == ISD::TRUNCATE) {
4490 const SDValue &TruncInput = Arg.getOperand(0);
4491 if (TruncInput.getOpcode() == ISD::AssertZext &&
4492 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4493 Arg.getValueType()) {
4494 Arg = TruncInput.getOperand(0);
4495 continue;
4496 }
4497 }
4498 break;
4499 }
4500
4501 int FI = INT_MAX2147483647;
4502 if (Arg.getOpcode() == ISD::CopyFromReg) {
4503 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4504 if (!VR.isVirtual())
4505 return false;
4506 MachineInstr *Def = MRI->getVRegDef(VR);
4507 if (!Def)
4508 return false;
4509 if (!Flags.isByVal()) {
4510 if (!TII->isLoadFromStackSlot(*Def, FI))
4511 return false;
4512 } else {
4513 unsigned Opcode = Def->getOpcode();
4514 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4515 Opcode == X86::LEA64_32r) &&
4516 Def->getOperand(1).isFI()) {
4517 FI = Def->getOperand(1).getIndex();
4518 Bytes = Flags.getByValSize();
4519 } else
4520 return false;
4521 }
4522 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4523 if (Flags.isByVal())
4524 // ByVal argument is passed in as a pointer but it's now being
4525 // dereferenced. e.g.
4526 // define @foo(%struct.X* %A) {
4527 // tail call @bar(%struct.X* byval %A)
4528 // }
4529 return false;
4530 SDValue Ptr = Ld->getBasePtr();
4531 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4532 if (!FINode)
4533 return false;
4534 FI = FINode->getIndex();
4535 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4536 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4537 FI = FINode->getIndex();
4538 Bytes = Flags.getByValSize();
4539 } else
4540 return false;
4541
4542 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4542, __PRETTY_FUNCTION__))
;
4543 if (!MFI.isFixedObjectIndex(FI))
4544 return false;
4545
4546 if (Offset != MFI.getObjectOffset(FI))
4547 return false;
4548
4549 // If this is not byval, check that the argument stack object is immutable.
4550 // inalloca and argument copy elision can create mutable argument stack
4551 // objects. Byval objects can be mutated, but a byval call intends to pass the
4552 // mutated memory.
4553 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4554 return false;
4555
4556 if (VA.getLocVT().getFixedSizeInBits() >
4557 Arg.getValueSizeInBits().getFixedSize()) {
4558 // If the argument location is wider than the argument type, check that any
4559 // extension flags match.
4560 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4561 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4562 return false;
4563 }
4564 }
4565
4566 return Bytes == MFI.getObjectSize(FI);
4567}
4568
4569/// Check whether the call is eligible for tail call optimization. Targets
4570/// that want to do tail call optimization should implement this function.
4571bool X86TargetLowering::IsEligibleForTailCallOptimization(
4572 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4573 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4574 const SmallVectorImpl<ISD::OutputArg> &Outs,
4575 const SmallVectorImpl<SDValue> &OutVals,
4576 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4577 if (!mayTailCallThisCC(CalleeCC))
4578 return false;
4579
4580 // If -tailcallopt is specified, make fastcc functions tail-callable.
4581 MachineFunction &MF = DAG.getMachineFunction();
4582 const Function &CallerF = MF.getFunction();
4583
4584 // If the function return type is x86_fp80 and the callee return type is not,
4585 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4586 // perform a tailcall optimization here.
4587 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4588 return false;
4589
4590 CallingConv::ID CallerCC = CallerF.getCallingConv();
4591 bool CCMatch = CallerCC == CalleeCC;
4592 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4593 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4594 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4595 CalleeCC == CallingConv::Tail;
4596
4597 // Win64 functions have extra shadow space for argument homing. Don't do the
4598 // sibcall if the caller and callee have mismatched expectations for this
4599 // space.
4600 if (IsCalleeWin64 != IsCallerWin64)
4601 return false;
4602
4603 if (IsGuaranteeTCO) {
4604 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4605 return true;
4606 return false;
4607 }
4608
4609 // Look for obvious safe cases to perform tail call optimization that do not
4610 // require ABI changes. This is what gcc calls sibcall.
4611
4612 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4613 // emit a special epilogue.
4614 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4615 if (RegInfo->needsStackRealignment(MF))
4616 return false;
4617
4618 // Also avoid sibcall optimization if either caller or callee uses struct
4619 // return semantics.
4620 if (isCalleeStructRet || isCallerStructRet)
4621 return false;
4622
4623 // Do not sibcall optimize vararg calls unless all arguments are passed via
4624 // registers.
4625 LLVMContext &C = *DAG.getContext();
4626 if (isVarArg && !Outs.empty()) {
4627 // Optimizing for varargs on Win64 is unlikely to be safe without
4628 // additional testing.
4629 if (IsCalleeWin64 || IsCallerWin64)
4630 return false;
4631
4632 SmallVector<CCValAssign, 16> ArgLocs;
4633 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4634
4635 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4636 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4637 if (!ArgLocs[i].isRegLoc())
4638 return false;
4639 }
4640
4641 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4642 // stack. Therefore, if it's not used by the call it is not safe to optimize
4643 // this into a sibcall.
4644 bool Unused = false;
4645 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4646 if (!Ins[i].Used) {
4647 Unused = true;
4648 break;
4649 }
4650 }
4651 if (Unused) {
4652 SmallVector<CCValAssign, 16> RVLocs;
4653 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4654 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4655 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4656 CCValAssign &VA = RVLocs[i];
4657 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4658 return false;
4659 }
4660 }
4661
4662 // Check that the call results are passed in the same way.
4663 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4664 RetCC_X86, RetCC_X86))
4665 return false;
4666 // The callee has to preserve all registers the caller needs to preserve.
4667 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4668 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4669 if (!CCMatch) {
4670 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4671 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4672 return false;
4673 }
4674
4675 unsigned StackArgsSize = 0;
4676
4677 // If the callee takes no arguments then go on to check the results of the
4678 // call.
4679 if (!Outs.empty()) {
4680 // Check if stack adjustment is needed. For now, do not do this if any
4681 // argument is passed on the stack.
4682 SmallVector<CCValAssign, 16> ArgLocs;
4683 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4684
4685 // Allocate shadow area for Win64
4686 if (IsCalleeWin64)
4687 CCInfo.AllocateStack(32, Align(8));
4688
4689 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4690 StackArgsSize = CCInfo.getNextStackOffset();
4691
4692 if (CCInfo.getNextStackOffset()) {
4693 // Check if the arguments are already laid out in the right way as
4694 // the caller's fixed stack objects.
4695 MachineFrameInfo &MFI = MF.getFrameInfo();
4696 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4697 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4698 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4699 CCValAssign &VA = ArgLocs[i];
4700 SDValue Arg = OutVals[i];
4701 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4702 if (VA.getLocInfo() == CCValAssign::Indirect)
4703 return false;
4704 if (!VA.isRegLoc()) {
4705 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4706 MFI, MRI, TII, VA))
4707 return false;
4708 }
4709 }
4710 }
4711
4712 bool PositionIndependent = isPositionIndependent();
4713 // If the tailcall address may be in a register, then make sure it's
4714 // possible to register allocate for it. In 32-bit, the call address can
4715 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4716 // callee-saved registers are restored. These happen to be the same
4717 // registers used to pass 'inreg' arguments so watch out for those.
4718 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4719 !isa<ExternalSymbolSDNode>(Callee)) ||
4720 PositionIndependent)) {
4721 unsigned NumInRegs = 0;
4722 // In PIC we need an extra register to formulate the address computation
4723 // for the callee.
4724 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4725
4726 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4727 CCValAssign &VA = ArgLocs[i];
4728 if (!VA.isRegLoc())
4729 continue;
4730 Register Reg = VA.getLocReg();
4731 switch (Reg) {
4732 default: break;
4733 case X86::EAX: case X86::EDX: case X86::ECX:
4734 if (++NumInRegs == MaxInRegs)
4735 return false;
4736 break;
4737 }
4738 }
4739 }
4740
4741 const MachineRegisterInfo &MRI = MF.getRegInfo();
4742 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4743 return false;
4744 }
4745
4746 bool CalleeWillPop =
4747 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4748 MF.getTarget().Options.GuaranteedTailCallOpt);
4749
4750 if (unsigned BytesToPop =
4751 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4752 // If we have bytes to pop, the callee must pop them.
4753 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4754 if (!CalleePopMatches)
4755 return false;
4756 } else if (CalleeWillPop && StackArgsSize > 0) {
4757 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4758 return false;
4759 }
4760
4761 return true;
4762}
4763
4764FastISel *
4765X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4766 const TargetLibraryInfo *libInfo) const {
4767 return X86::createFastISel(funcInfo, libInfo);
4768}
4769
4770//===----------------------------------------------------------------------===//
4771// Other Lowering Hooks
4772//===----------------------------------------------------------------------===//
4773
4774static bool MayFoldLoad(SDValue Op) {
4775 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4776}
4777
4778static bool MayFoldIntoStore(SDValue Op) {
4779 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4780}
4781
4782static bool MayFoldIntoZeroExtend(SDValue Op) {
4783 if (Op.hasOneUse()) {
4784 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4785 return (ISD::ZERO_EXTEND == Opcode);
4786 }
4787 return false;
4788}
4789
4790static bool isTargetShuffle(unsigned Opcode) {
4791 switch(Opcode) {
4792 default: return false;
4793 case X86ISD::BLENDI:
4794 case X86ISD::PSHUFB:
4795 case X86ISD::PSHUFD:
4796 case X86ISD::PSHUFHW:
4797 case X86ISD::PSHUFLW:
4798 case X86ISD::SHUFP:
4799 case X86ISD::INSERTPS:
4800 case X86ISD::EXTRQI:
4801 case X86ISD::INSERTQI:
4802 case X86ISD::VALIGN:
4803 case X86ISD::PALIGNR:
4804 case X86ISD::VSHLDQ:
4805 case X86ISD::VSRLDQ:
4806 case X86ISD::MOVLHPS:
4807 case X86ISD::MOVHLPS:
4808 case X86ISD::MOVSHDUP:
4809 case X86ISD::MOVSLDUP:
4810 case X86ISD::MOVDDUP:
4811 case X86ISD::MOVSS:
4812 case X86ISD::MOVSD:
4813 case X86ISD::UNPCKL:
4814 case X86ISD::UNPCKH:
4815 case X86ISD::VBROADCAST:
4816 case X86ISD::VPERMILPI:
4817 case X86ISD::VPERMILPV:
4818 case X86ISD::VPERM2X128:
4819 case X86ISD::SHUF128:
4820 case X86ISD::VPERMIL2:
4821 case X86ISD::VPERMI:
4822 case X86ISD::VPPERM:
4823 case X86ISD::VPERMV:
4824 case X86ISD::VPERMV3:
4825 case X86ISD::VZEXT_MOVL:
4826 return true;
4827 }
4828}
4829
4830static bool isTargetShuffleVariableMask(unsigned Opcode) {
4831 switch (Opcode) {
4832 default: return false;
4833 // Target Shuffles.
4834 case X86ISD::PSHUFB:
4835 case X86ISD::VPERMILPV:
4836 case X86ISD::VPERMIL2:
4837 case X86ISD::VPPERM:
4838 case X86ISD::VPERMV:
4839 case X86ISD::VPERMV3:
4840 return true;
4841 // 'Faux' Target Shuffles.
4842 case ISD::OR:
4843 case ISD::AND:
4844 case X86ISD::ANDNP:
4845 return true;
4846 }
4847}
4848
4849static bool isTargetShuffleSplat(SDValue Op) {
4850 unsigned Opcode = Op.getOpcode();
4851 if (Opcode == ISD::EXTRACT_SUBVECTOR)
4852 return isTargetShuffleSplat(Op.getOperand(0));
4853 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
4854}
4855
4856SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4857 MachineFunction &MF = DAG.getMachineFunction();
4858 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4859 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4860 int ReturnAddrIndex = FuncInfo->getRAIndex();
4861
4862 if (ReturnAddrIndex == 0) {
4863 // Set up a frame object for the return address.
4864 unsigned SlotSize = RegInfo->getSlotSize();
4865 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4866 -(int64_t)SlotSize,
4867 false);
4868 FuncInfo->setRAIndex(ReturnAddrIndex);
4869 }
4870
4871 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4872}
4873
4874bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4875 bool hasSymbolicDisplacement) {
4876 // Offset should fit into 32 bit immediate field.
4877 if (!isInt<32>(Offset))
4878 return false;
4879
4880 // If we don't have a symbolic displacement - we don't have any extra
4881 // restrictions.
4882 if (!hasSymbolicDisplacement)
4883 return true;
4884
4885 // FIXME: Some tweaks might be needed for medium code model.
4886 if (M != CodeModel::Small && M != CodeModel::Kernel)
4887 return false;
4888
4889 // For small code model we assume that latest object is 16MB before end of 31
4890 // bits boundary. We may also accept pretty large negative constants knowing
4891 // that all objects are in the positive half of address space.
4892 if (M == CodeModel::Small && Offset < 16*1024*1024)
4893 return true;
4894
4895 // For kernel code model we know that all object resist in the negative half
4896 // of 32bits address space. We may not accept negative offsets, since they may
4897 // be just off and we may accept pretty large positive ones.
4898 if (M == CodeModel::Kernel && Offset >= 0)
4899 return true;
4900
4901 return false;
4902}
4903
4904/// Determines whether the callee is required to pop its own arguments.
4905/// Callee pop is necessary to support tail calls.
4906bool X86::isCalleePop(CallingConv::ID CallingConv,
4907 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4908 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4909 // can guarantee TCO.
4910 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4911 return true;
4912
4913 switch (CallingConv) {
4914 default:
4915 return false;
4916 case CallingConv::X86_StdCall:
4917 case CallingConv::X86_FastCall:
4918 case CallingConv::X86_ThisCall:
4919 case CallingConv::X86_VectorCall:
4920 return !is64Bit;
4921 }
4922}
4923
4924/// Return true if the condition is an signed comparison operation.
4925static bool isX86CCSigned(unsigned X86CC) {
4926 switch (X86CC) {
4927 default:
4928 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4928)
;
4929 case X86::COND_E:
4930 case X86::COND_NE:
4931 case X86::COND_B:
4932 case X86::COND_A:
4933 case X86::COND_BE:
4934 case X86::COND_AE:
4935 return false;
4936 case X86::COND_G:
4937 case X86::COND_GE:
4938 case X86::COND_L:
4939 case X86::COND_LE:
4940 return true;
4941 }
4942}
4943
4944static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4945 switch (SetCCOpcode) {
4946 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4946)
;
4947 case ISD::SETEQ: return X86::COND_E;
4948 case ISD::SETGT: return X86::COND_G;
4949 case ISD::SETGE: return X86::COND_GE;
4950 case ISD::SETLT: return X86::COND_L;
4951 case ISD::SETLE: return X86::COND_LE;
4952 case ISD::SETNE: return X86::COND_NE;
4953 case ISD::SETULT: return X86::COND_B;
4954 case ISD::SETUGT: return X86::COND_A;
4955 case ISD::SETULE: return X86::COND_BE;
4956 case ISD::SETUGE: return X86::COND_AE;
4957 }
4958}
4959
4960/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4961/// condition code, returning the condition code and the LHS/RHS of the
4962/// comparison to make.
4963static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4964 bool isFP, SDValue &LHS, SDValue &RHS,
4965 SelectionDAG &DAG) {
4966 if (!isFP) {
4967 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4968 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4969 // X > -1 -> X == 0, jump !sign.
4970 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4971 return X86::COND_NS;
4972 }
4973 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4974 // X < 0 -> X == 0, jump on sign.
4975 return X86::COND_S;
4976 }
4977 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
4978 // X >= 0 -> X == 0, jump on !sign.
4979 return X86::COND_NS;
4980 }
4981 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
4982 // X < 1 -> X <= 0
4983 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4984 return X86::COND_LE;
4985 }
4986 }
4987
4988 return TranslateIntegerX86CC(SetCCOpcode);
4989 }
4990
4991 // First determine if it is required or is profitable to flip the operands.
4992
4993 // If LHS is a foldable load, but RHS is not, flip the condition.
4994 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4995 !ISD::isNON_EXTLoad(RHS.getNode())) {
4996 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4997 std::swap(LHS, RHS);
4998 }
4999
5000 switch (SetCCOpcode) {
5001 default: break;
5002 case ISD::SETOLT:
5003 case ISD::SETOLE:
5004 case ISD::SETUGT:
5005 case ISD::SETUGE:
5006 std::swap(LHS, RHS);
5007 break;
5008 }
5009
5010 // On a floating point condition, the flags are set as follows:
5011 // ZF PF CF op
5012 // 0 | 0 | 0 | X > Y
5013 // 0 | 0 | 1 | X < Y
5014 // 1 | 0 | 0 | X == Y
5015 // 1 | 1 | 1 | unordered
5016 switch (SetCCOpcode) {
5017 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5017)
;
5018 case ISD::SETUEQ:
5019 case ISD::SETEQ: return X86::COND_E;
5020 case ISD::SETOLT: // flipped
5021 case ISD::SETOGT:
5022 case ISD::SETGT: return X86::COND_A;
5023 case ISD::SETOLE: // flipped
5024 case ISD::SETOGE:
5025 case ISD::SETGE: return X86::COND_AE;
5026 case ISD::SETUGT: // flipped
5027 case ISD::SETULT:
5028 case ISD::SETLT: return X86::COND_B;
5029 case ISD::SETUGE: // flipped
5030 case ISD::SETULE:
5031 case ISD::SETLE: return X86::COND_BE;
5032 case ISD::SETONE:
5033 case ISD::SETNE: return X86::COND_NE;
5034 case ISD::SETUO: return X86::COND_P;
5035 case ISD::SETO: return X86::COND_NP;
5036 case ISD::SETOEQ:
5037 case ISD::SETUNE: return X86::COND_INVALID;
5038 }
5039}
5040
5041/// Is there a floating point cmov for the specific X86 condition code?
5042/// Current x86 isa includes the following FP cmov instructions:
5043/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5044static bool hasFPCMov(unsigned X86CC) {
5045 switch (X86CC) {
5046 default:
5047 return false;
5048 case X86::COND_B:
5049 case X86::COND_BE:
5050 case X86::COND_E:
5051 case X86::COND_P:
5052 case X86::COND_A:
5053 case X86::COND_AE:
5054 case X86::COND_NE:
5055 case X86::COND_NP:
5056 return true;
5057 }
5058}
5059
5060
5061bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5062 const CallInst &I,
5063 MachineFunction &MF,
5064 unsigned Intrinsic) const {
5065 Info.flags = MachineMemOperand::MONone;
5066 Info.offset = 0;
5067
5068 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5069 if (!IntrData) {
5070 switch (Intrinsic) {
5071 case Intrinsic::x86_aesenc128kl:
5072 case Intrinsic::x86_aesdec128kl:
5073 Info.opc = ISD::INTRINSIC_W_CHAIN;
5074 Info.ptrVal = I.getArgOperand(1);
5075 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5076 Info.align = Align(1);
5077 Info.flags |= MachineMemOperand::MOLoad;
5078 return true;
5079 case Intrinsic::x86_aesenc256kl:
5080 case Intrinsic::x86_aesdec256kl:
5081 Info.opc = ISD::INTRINSIC_W_CHAIN;
5082 Info.ptrVal = I.getArgOperand(1);
5083 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5084 Info.align = Align(1);
5085 Info.flags |= MachineMemOperand::MOLoad;
5086 return true;
5087 case Intrinsic::x86_aesencwide128kl:
5088 case Intrinsic::x86_aesdecwide128kl:
5089 Info.opc = ISD::INTRINSIC_W_CHAIN;
5090 Info.ptrVal = I.getArgOperand(0);
5091 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5092 Info.align = Align(1);
5093 Info.flags |= MachineMemOperand::MOLoad;
5094 return true;
5095 case Intrinsic::x86_aesencwide256kl:
5096 case Intrinsic::x86_aesdecwide256kl:
5097 Info.opc = ISD::INTRINSIC_W_CHAIN;
5098 Info.ptrVal = I.getArgOperand(0);
5099 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5100 Info.align = Align(1);
5101 Info.flags |= MachineMemOperand::MOLoad;
5102 return true;
5103 }
5104 return false;
5105 }
5106
5107 switch (IntrData->Type) {
5108 case TRUNCATE_TO_MEM_VI8:
5109 case TRUNCATE_TO_MEM_VI16:
5110 case TRUNCATE_TO_MEM_VI32: {
5111 Info.opc = ISD::INTRINSIC_VOID;
5112 Info.ptrVal = I.getArgOperand(0);
5113 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5114 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5115 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5116 ScalarVT = MVT::i8;
5117 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5118 ScalarVT = MVT::i16;
5119 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5120 ScalarVT = MVT::i32;
5121
5122 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5123 Info.align = Align(1);
5124 Info.flags |= MachineMemOperand::MOStore;
5125 break;
5126 }
5127 case GATHER:
5128 case GATHER_AVX2: {
5129 Info.opc = ISD::INTRINSIC_W_CHAIN;
5130 Info.ptrVal = nullptr;
5131 MVT DataVT = MVT::getVT(I.getType());
5132 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5133 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5134 IndexVT.getVectorNumElements());
5135 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5136 Info.align = Align(1);
5137 Info.flags |= MachineMemOperand::MOLoad;
5138 break;
5139 }
5140 case SCATTER: {
5141 Info.opc = ISD::INTRINSIC_VOID;
5142 Info.ptrVal = nullptr;
5143 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5144 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5145 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5146 IndexVT.getVectorNumElements());
5147 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5148 Info.align = Align(1);
5149 Info.flags |= MachineMemOperand::MOStore;
5150 break;
5151 }
5152 default:
5153 return false;
5154 }
5155
5156 return true;
5157}
5158
5159/// Returns true if the target can instruction select the
5160/// specified FP immediate natively. If false, the legalizer will
5161/// materialize the FP immediate as a load from a constant pool.
5162bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5163 bool ForCodeSize) const {
5164 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5165 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5166 return true;
5167 }
5168 return false;
5169}
5170
5171bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5172 ISD::LoadExtType ExtTy,
5173 EVT NewVT) const {
5174 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"
) ? static_cast<void> (0) : __assert_fail ("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5174, __PRETTY_FUNCTION__))
;
5175
5176 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5177 // relocation target a movq or addq instruction: don't let the load shrink.
5178 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5179 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5180 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5181 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5182
5183 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5184 // those uses are extracted directly into a store, then the extract + store
5185 // can be store-folded. Therefore, it's probably not worth splitting the load.
5186 EVT VT = Load->getValueType(0);
5187 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5188 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5189 // Skip uses of the chain value. Result 0 of the node is the load value.
5190 if (UI.getUse().getResNo() != 0)
5191 continue;
5192
5193 // If this use is not an extract + store, it's probably worth splitting.
5194 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5195 UI->use_begin()->getOpcode() != ISD::STORE)
5196 return true;
5197 }
5198 // All non-chain uses are extract + store.
5199 return false;
5200 }
5201
5202 return true;
5203}
5204
5205/// Returns true if it is beneficial to convert a load of a constant
5206/// to just the constant itself.
5207bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5208 Type *Ty) const {
5209 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5209, __PRETTY_FUNCTION__))
;
5210
5211 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5212 if (BitSize == 0 || BitSize > 64)
5213 return false;
5214 return true;
5215}
5216
5217bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5218 // If we are using XMM registers in the ABI and the condition of the select is
5219 // a floating-point compare and we have blendv or conditional move, then it is
5220 // cheaper to select instead of doing a cross-register move and creating a
5221 // load that depends on the compare result.
5222 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5223 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5224}
5225
5226bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5227 // TODO: It might be a win to ease or lift this restriction, but the generic
5228 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5229 if (VT.isVector() && Subtarget.hasAVX512())
5230 return false;
5231
5232 return true;
5233}
5234
5235bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5236 SDValue C) const {
5237 // TODO: We handle scalars using custom code, but generic combining could make
5238 // that unnecessary.
5239 APInt MulC;
5240 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5241 return false;
5242
5243 // Find the type this will be legalized too. Otherwise we might prematurely
5244 // convert this to shl+add/sub and then still have to type legalize those ops.
5245 // Another choice would be to defer the decision for illegal types until
5246 // after type legalization. But constant splat vectors of i64 can't make it
5247 // through type legalization on 32-bit targets so we would need to special
5248 // case vXi64.
5249 while (getTypeAction(Context, VT) != TypeLegal)
5250 VT = getTypeToTransformTo(Context, VT);
5251
5252 // If vector multiply is legal, assume that's faster than shl + add/sub.
5253 // TODO: Multiply is a complex op with higher latency and lower throughput in
5254 // most implementations, so this check could be loosened based on type
5255 // and/or a CPU attribute.
5256 if (isOperationLegal(ISD::MUL, VT))
5257 return false;
5258
5259 // shl+add, shl+sub, shl+add+neg
5260 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5261 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5262}
5263
5264bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5265 unsigned Index) const {
5266 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5267 return false;
5268
5269 // Mask vectors support all subregister combinations and operations that
5270 // extract half of vector.
5271 if (ResVT.getVectorElementType() == MVT::i1)
5272 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5273 (Index == ResVT.getVectorNumElements()));
5274
5275 return (Index % ResVT.getVectorNumElements()) == 0;
5276}
5277
5278bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5279 unsigned Opc = VecOp.getOpcode();
5280
5281 // Assume target opcodes can't be scalarized.
5282 // TODO - do we have any exceptions?
5283 if (Opc >= ISD::BUILTIN_OP_END)
5284 return false;
5285
5286 // If the vector op is not supported, try to convert to scalar.
5287 EVT VecVT = VecOp.getValueType();
5288 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5289 return true;
5290
5291 // If the vector op is supported, but the scalar op is not, the transform may
5292 // not be worthwhile.
5293 EVT ScalarVT = VecVT.getScalarType();
5294 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5295}
5296
5297bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5298 bool) const {
5299 // TODO: Allow vectors?
5300 if (VT.isVector())
5301 return false;
5302 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5303}
5304
5305bool X86TargetLowering::isCheapToSpeculateCttz() const {
5306 // Speculate cttz only if we can directly use TZCNT.
5307 return Subtarget.hasBMI();
5308}
5309
5310bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5311 // Speculate ctlz only if we can directly use LZCNT.
5312 return Subtarget.hasLZCNT();
5313}
5314
5315bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5316 const SelectionDAG &DAG,
5317 const MachineMemOperand &MMO) const {
5318 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5319 BitcastVT.getVectorElementType() == MVT::i1)
5320 return false;
5321
5322 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5323 return false;
5324
5325 // If both types are legal vectors, it's always ok to convert them.
5326 if (LoadVT.isVector() && BitcastVT.isVector() &&
5327 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5328 return true;
5329
5330 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5331}
5332
5333bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5334 const SelectionDAG &DAG) const {
5335 // Do not merge to float value size (128 bytes) if no implicit
5336 // float attribute is set.
5337 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5338 Attribute::NoImplicitFloat);
5339
5340 if (NoFloat) {
5341 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5342 return (MemVT.getSizeInBits() <= MaxIntSize);
5343 }
5344 // Make sure we don't merge greater than our preferred vector
5345 // width.
5346 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5347 return false;
5348
5349 // Don't merge to x86 amx tile, as we only map MVT::v256i32
5350 // to x86 amx tile on amx intrinsics.
5351 if (MemVT == MVT::v256i32)
5352 return false;
5353
5354 return true;
5355}
5356
5357bool X86TargetLowering::isCtlzFast() const {
5358 return Subtarget.hasFastLZCNT();
5359}
5360
5361bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5362 const Instruction &AndI) const {
5363 return true;
5364}
5365
5366bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5367 EVT VT = Y.getValueType();
5368
5369 if (VT.isVector())
5370 return false;
5371
5372 if (!Subtarget.hasBMI())
5373 return false;
5374
5375 // There are only 32-bit and 64-bit forms for 'andn'.
5376 if (VT != MVT::i32 && VT != MVT::i64)
5377 return false;
5378
5379 return !isa<ConstantSDNode>(Y);
5380}
5381
5382bool X86TargetLowering::hasAndNot(SDValue Y) const {
5383 EVT VT = Y.getValueType();
5384
5385 if (!VT.isVector())
5386 return hasAndNotCompare(Y);
5387
5388 // Vector.
5389
5390 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5391 return false;
5392
5393 if (VT == MVT::v4i32)
5394 return true;
5395
5396 return Subtarget.hasSSE2();
5397}
5398
5399bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5400 return X.getValueType().isScalarInteger(); // 'bt'
5401}
5402
5403bool X86TargetLowering::
5404 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5405 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5406 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5407 SelectionDAG &DAG) const {
5408 // Does baseline recommend not to perform the fold by default?
5409 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5410 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5411 return false;
5412 // For scalars this transform is always beneficial.
5413 if (X.getValueType().isScalarInteger())
5414 return true;
5415 // If all the shift amounts are identical, then transform is beneficial even
5416 // with rudimentary SSE2 shifts.
5417 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5418 return true;
5419 // If we have AVX2 with it's powerful shift operations, then it's also good.
5420 if (Subtarget.hasAVX2())
5421 return true;
5422 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5423 return NewShiftOpcode == ISD::SHL;
5424}
5425
5426bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5427 const SDNode *N, CombineLevel Level) const {
5428 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5432, __PRETTY_FUNCTION__))
5429 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5432, __PRETTY_FUNCTION__))
5430 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5432, __PRETTY_FUNCTION__))
5431 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5432, __PRETTY_FUNCTION__))
5432 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5432, __PRETTY_FUNCTION__))
;
5433 EVT VT = N->getValueType(0);
5434 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5435 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5436 // Only fold if the shift values are equal - so it folds to AND.
5437 // TODO - we should fold if either is a non-uniform vector but we don't do
5438 // the fold for non-splats yet.
5439 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5440 }
5441 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5442}
5443
5444bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5445 EVT VT = Y.getValueType();
5446
5447 // For vectors, we don't have a preference, but we probably want a mask.
5448 if (VT.isVector())
5449 return false;
5450
5451 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5452 if (VT == MVT::i64 && !Subtarget.is64Bit())
5453 return false;
5454
5455 return true;
5456}
5457
5458bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5459 SDNode *N) const {
5460 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5461 !Subtarget.isOSWindows())
5462 return false;
5463 return true;
5464}
5465
5466bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5467 // Any legal vector type can be splatted more efficiently than
5468 // loading/spilling from memory.
5469 return isTypeLegal(VT);
5470}
5471
5472MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5473 MVT VT = MVT::getIntegerVT(NumBits);
5474 if (isTypeLegal(VT))
5475 return VT;
5476
5477 // PMOVMSKB can handle this.
5478 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5479 return MVT::v16i8;
5480
5481 // VPMOVMSKB can handle this.
5482 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5483 return MVT::v32i8;
5484
5485 // TODO: Allow 64-bit type for 32-bit target.
5486 // TODO: 512-bit types should be allowed, but make sure that those
5487 // cases are handled in combineVectorSizedSetCCEquality().
5488
5489 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5490}
5491
5492/// Val is the undef sentinel value or equal to the specified value.
5493static bool isUndefOrEqual(int Val, int CmpVal) {
5494 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5495}
5496
5497/// Return true if every element in Mask is the undef sentinel value or equal to
5498/// the specified value..
5499static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
5500 return llvm::all_of(Mask, [CmpVal](int M) {
5501 return (M == SM_SentinelUndef) || (M == CmpVal);
5502 });
5503}
5504
5505/// Val is either the undef or zero sentinel value.
5506static bool isUndefOrZero(int Val) {
5507 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5508}
5509
5510/// Return true if every element in Mask, beginning from position Pos and ending
5511/// in Pos+Size is the undef sentinel value.
5512static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5513 return llvm::all_of(Mask.slice(Pos, Size),
5514 [](int M) { return M == SM_SentinelUndef; });
5515}
5516
5517/// Return true if the mask creates a vector whose lower half is undefined.
5518static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5519 unsigned NumElts = Mask.size();
5520 return isUndefInRange(Mask, 0, NumElts / 2);
5521}
5522
5523/// Return true if the mask creates a vector whose upper half is undefined.
5524static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5525 unsigned NumElts = Mask.size();
5526 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5527}
5528
5529/// Return true if Val falls within the specified range (L, H].
5530static bool isInRange(int Val, int Low, int Hi) {
5531 return (Val >= Low && Val < Hi);
5532}
5533
5534/// Return true if the value of any element in Mask falls within the specified
5535/// range (L, H].
5536static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5537 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5538}
5539
5540/// Return true if the value of any element in Mask is the zero sentinel value.
5541static bool isAnyZero(ArrayRef<int> Mask) {
5542 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5543}
5544
5545/// Return true if the value of any element in Mask is the zero or undef
5546/// sentinel values.
5547static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5548 return llvm::any_of(Mask, [](int M) {
5549 return M == SM_SentinelZero || M == SM_SentinelUndef;
5550 });
5551}
5552
5553/// Return true if Val is undef or if its value falls within the
5554/// specified range (L, H].
5555static bool isUndefOrInRange(int Val, int Low, int Hi) {
5556 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5557}
5558
5559/// Return true if every element in Mask is undef or if its value
5560/// falls within the specified range (L, H].
5561static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5562 return llvm::all_of(
5563 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5564}
5565
5566/// Return true if Val is undef, zero or if its value falls within the
5567/// specified range (L, H].
5568static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5569 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5570}
5571
5572/// Return true if every element in Mask is undef, zero or if its value
5573/// falls within the specified range (L, H].
5574static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5575 return llvm::all_of(
5576 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5577}
5578
5579/// Return true if every element in Mask, beginning
5580/// from position Pos and ending in Pos + Size, falls within the specified
5581/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5582static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5583 unsigned Size, int Low, int Step = 1) {
5584 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5585 if (!isUndefOrEqual(Mask[i], Low))
5586 return false;
5587 return true;
5588}
5589
5590/// Return true if every element in Mask, beginning
5591/// from position Pos and ending in Pos+Size, falls within the specified
5592/// sequential range (Low, Low+Size], or is undef or is zero.
5593static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5594 unsigned Size, int Low,
5595 int Step = 1) {
5596 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5597 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5598 return false;
5599 return true;
5600}
5601
5602/// Return true if every element in Mask, beginning
5603/// from position Pos and ending in Pos+Size is undef or is zero.
5604static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5605 unsigned Size) {
5606 return llvm::all_of(Mask.slice(Pos, Size),
5607 [](int M) { return isUndefOrZero(M); });
5608}
5609
5610/// Helper function to test whether a shuffle mask could be
5611/// simplified by widening the elements being shuffled.
5612///
5613/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5614/// leaves it in an unspecified state.
5615///
5616/// NOTE: This must handle normal vector shuffle masks and *target* vector
5617/// shuffle masks. The latter have the special property of a '-2' representing
5618/// a zero-ed lane of a vector.
5619static bool canWidenShuffleElements(ArrayRef<int> Mask,
5620 SmallVectorImpl<int> &WidenedMask) {
5621 WidenedMask.assign(Mask.size() / 2, 0);
5622 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5623 int M0 = Mask[i];
5624 int M1 = Mask[i + 1];
5625
5626 // If both elements are undef, its trivial.
5627 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5628 WidenedMask[i / 2] = SM_SentinelUndef;
5629 continue;
5630 }
5631
5632 // Check for an undef mask and a mask value properly aligned to fit with
5633 // a pair of values. If we find such a case, use the non-undef mask's value.
5634 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5635 WidenedMask[i / 2] = M1 / 2;
5636 continue;
5637 }
5638 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5639 WidenedMask[i / 2] = M0 / 2;
5640 continue;
5641 }
5642
5643 // When zeroing, we need to spread the zeroing across both lanes to widen.
5644 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5645 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5646 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5647 WidenedMask[i / 2] = SM_SentinelZero;
5648 continue;
5649 }
5650 return false;
5651 }
5652
5653 // Finally check if the two mask values are adjacent and aligned with
5654 // a pair.
5655 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5656 WidenedMask[i / 2] = M0 / 2;
5657 continue;
5658 }
5659
5660 // Otherwise we can't safely widen the elements used in this shuffle.
5661 return false;
5662 }
5663 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5664, __PRETTY_FUNCTION__))
5664 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5664, __PRETTY_FUNCTION__))
;
5665
5666 return true;
5667}
5668
5669static bool canWidenShuffleElements(ArrayRef<int> Mask,
5670 const APInt &Zeroable,
5671 bool V2IsZero,
5672 SmallVectorImpl<int> &WidenedMask) {
5673 // Create an alternative mask with info about zeroable elements.
5674 // Here we do not set undef elements as zeroable.
5675 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5676 if (V2IsZero) {
5677 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((!Zeroable.isNullValue() && "V2's non-undef elements are used?!"
) ? static_cast<void> (0) : __assert_fail ("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5677, __PRETTY_FUNCTION__))
;
5678 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5679 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5680 ZeroableMask[i] = SM_SentinelZero;
5681 }
5682 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5683}
5684
5685static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5686 SmallVector<int, 32> WidenedMask;
5687 return canWidenShuffleElements(Mask, WidenedMask);
5688}
5689
5690// Attempt to narrow/widen shuffle mask until it matches the target number of
5691// elements.
5692static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
5693 SmallVectorImpl<int> &ScaledMask) {
5694 unsigned NumSrcElts = Mask.size();
5695 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&((((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts
) == 0) && "Illegal shuffle scale factor") ? static_cast
<void> (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5696, __PRETTY_FUNCTION__))
5696 "Illegal shuffle scale factor")((((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts
) == 0) && "Illegal shuffle scale factor") ? static_cast
<void> (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5696, __PRETTY_FUNCTION__))
;
5697
5698 // Narrowing is guaranteed to work.
5699 if (NumDstElts >= NumSrcElts) {
5700 int Scale = NumDstElts / NumSrcElts;
5701 llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask);
5702 return true;
5703 }
5704
5705 // We have to repeat the widening until we reach the target size, but we can
5706 // split out the first widening as it sets up ScaledMask for us.
5707 if (canWidenShuffleElements(Mask, ScaledMask)) {
5708 while (ScaledMask.size() > NumDstElts) {
5709 SmallVector<int, 16> WidenedMask;
5710 if (!canWidenShuffleElements(ScaledMask, WidenedMask))
5711 return false;
5712 ScaledMask = std::move(WidenedMask);
5713 }
5714 return true;
5715 }
5716
5717 return false;
5718}
5719
5720/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5721bool X86::isZeroNode(SDValue Elt) {
5722 return isNullConstant(Elt) || isNullFPConstant(Elt);
5723}
5724
5725// Build a vector of constants.
5726// Use an UNDEF node if MaskElt == -1.
5727// Split 64-bit constants in the 32-bit mode.
5728static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5729 const SDLoc &dl, bool IsMask = false) {
5730
5731 SmallVector<SDValue, 32> Ops;
5732 bool Split = false;
5733
5734 MVT ConstVecVT = VT;
5735 unsigned NumElts = VT.getVectorNumElements();
5736 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5737 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5738 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5739 Split = true;
5740 }
5741
5742 MVT EltVT = ConstVecVT.getVectorElementType();
5743 for (unsigned i = 0; i < NumElts; ++i) {
5744 bool IsUndef = Values[i] < 0 && IsMask;
5745 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5746 DAG.getConstant(Values[i], dl, EltVT);
5747 Ops.push_back(OpNode);
5748 if (Split)
5749 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5750 DAG.getConstant(0, dl, EltVT));
5751 }
5752 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5753 if (Split)
5754 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5755 return ConstsNode;
5756}
5757
5758static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5759 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5760 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5761, __PRETTY_FUNCTION__))
5761 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5761, __PRETTY_FUNCTION__))
;
5762 SmallVector<SDValue, 32> Ops;
5763 bool Split = false;
5764
5765 MVT ConstVecVT = VT;
5766 unsigned NumElts = VT.getVectorNumElements();
5767 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5768 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5769 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5770 Split = true;
5771 }
5772
5773 MVT EltVT = ConstVecVT.getVectorElementType();
5774 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5775 if (Undefs[i]) {
5776 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5777 continue;
5778 }
5779 const APInt &V = Bits[i];
5780 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5780, __PRETTY_FUNCTION__))
;
5781 if (Split) {
5782 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5783 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5784 } else if (EltVT == MVT::f32) {
5785 APFloat FV(APFloat::IEEEsingle(), V);
5786 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5787 } else if (EltVT == MVT::f64) {
5788 APFloat FV(APFloat::IEEEdouble(), V);
5789 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5790 } else {
5791 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5792 }
5793 }
5794
5795 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5796 return DAG.getBitcast(VT, ConstsNode);
5797}
5798
5799/// Returns a vector of specified type with all zero elements.
5800static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5801 SelectionDAG &DAG, const SDLoc &dl) {
5802 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5804, __PRETTY_FUNCTION__))
5803 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5804, __PRETTY_FUNCTION__))
5804 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5804, __PRETTY_FUNCTION__))
;
5805
5806 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5807 // type. This ensures they get CSE'd. But if the integer type is not
5808 // available, use a floating-point +0.0 instead.
5809 SDValue Vec;
5810 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5811 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5812 } else if (VT.isFloatingPoint()) {
5813 Vec = DAG.getConstantFP(+0.0, dl, VT);
5814 } else if (VT.getVectorElementType() == MVT::i1) {
5815 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5816, __PRETTY_FUNCTION__))
5816 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5816, __PRETTY_FUNCTION__))
;
5817 Vec = DAG.getConstant(0, dl, VT);
5818 } else {
5819 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5820 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5821 }
5822 return DAG.getBitcast(VT, Vec);
5823}
5824
5825static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5826 const SDLoc &dl, unsigned vectorWidth) {
5827 EVT VT = Vec.getValueType();
5828 EVT ElVT = VT.getVectorElementType();
5829 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5830 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5831 VT.getVectorNumElements()/Factor);
5832
5833 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5834 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5835 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5835, __PRETTY_FUNCTION__))
;
5836
5837 // This is the index of the first element of the vectorWidth-bit chunk
5838 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5839 IdxVal &= ~(ElemsPerChunk - 1);
5840
5841 // If the input is a buildvector just emit a smaller one.
5842 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5843 return DAG.getBuildVector(ResultVT, dl,
5844 Vec->ops().slice(IdxVal, ElemsPerChunk));
5845
5846 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5847 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5848}
5849
5850/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5851/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5852/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5853/// instructions or a simple subregister reference. Idx is an index in the
5854/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5855/// lowering EXTRACT_VECTOR_ELT operations easier.
5856static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5857 SelectionDAG &DAG, const SDLoc &dl) {
5858 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5859, __PRETTY_FUNCTION__))
5859 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5859, __PRETTY_FUNCTION__))
;
5860 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5861}
5862
5863/// Generate a DAG to grab 256-bits from a 512-bit vector.
5864static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5865 SelectionDAG &DAG, const SDLoc &dl) {
5866 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5866, __PRETTY_FUNCTION__))
;
5867 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5868}
5869
5870static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5871 SelectionDAG &DAG, const SDLoc &dl,
5872 unsigned vectorWidth) {
5873 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5874, __PRETTY_FUNCTION__))
5874 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5874, __PRETTY_FUNCTION__))
;
5875 // Inserting UNDEF is Result
5876 if (Vec.isUndef())
5877 return Result;
5878 EVT VT = Vec.getValueType();
5879 EVT ElVT = VT.getVectorElementType();
5880 EVT ResultVT = Result.getValueType();
5881
5882 // Insert the relevant vectorWidth bits.
5883 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5884 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5884, __PRETTY_FUNCTION__))
;
5885
5886 // This is the index of the first element of the vectorWidth-bit chunk
5887 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5888 IdxVal &= ~(ElemsPerChunk - 1);
5889
5890 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5891 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5892}
5893
5894/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5895/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5896/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5897/// simple superregister reference. Idx is an index in the 128 bits
5898/// we want. It need not be aligned to a 128-bit boundary. That makes
5899/// lowering INSERT_VECTOR_ELT operations easier.
5900static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5901 SelectionDAG &DAG, const SDLoc &dl) {
5902 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5902, __PRETTY_FUNCTION__))
;
5903 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5904}
5905
5906/// Widen a vector to a larger size with the same scalar type, with the new
5907/// elements either zero or undef.
5908static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5909 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5910 const SDLoc &dl) {
5911 assert(Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() &&((Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits
() && Vec.getValueType().getScalarType() == VT.getScalarType
() && "Unsupported vector widening type") ? static_cast
<void> (0) : __assert_fail ("Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5913, __PRETTY_FUNCTION__))
5912 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits
() && Vec.getValueType().getScalarType() == VT.getScalarType
() && "Unsupported vector widening type") ? static_cast
<void> (0) : __assert_fail ("Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20201211111113+08280c4b734/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5913, __PRETTY_