Bug Summary

File:llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 36022, column 5
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-09-26-161721-17566-1 -x c++ /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/IntrinsicLowering.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineJumpTableInfo.h"
38#include "llvm/CodeGen/MachineModuleInfo.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetLowering.h"
41#include "llvm/CodeGen/WinEHFuncInfo.h"
42#include "llvm/IR/CallingConv.h"
43#include "llvm/IR/Constants.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/DiagnosticInfo.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalAlias.h"
48#include "llvm/IR/GlobalVariable.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/MC/MCAsmInfo.h"
52#include "llvm/MC/MCContext.h"
53#include "llvm/MC/MCExpr.h"
54#include "llvm/MC/MCSymbol.h"
55#include "llvm/Support/CommandLine.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/Support/KnownBits.h"
59#include "llvm/Support/MathExtras.h"
60#include "llvm/Target/TargetOptions.h"
61#include <algorithm>
62#include <bitset>
63#include <cctype>
64#include <numeric>
65using namespace llvm;
66
67#define DEBUG_TYPE"x86-isel" "x86-isel"
68
69STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
70
71static cl::opt<int> ExperimentalPrefLoopAlignment(
72 "x86-experimental-pref-loop-alignment", cl::init(4),
73 cl::desc(
74 "Sets the preferable loop alignment for experiments (as log2 bytes)"
75 "(the last x86-experimental-pref-loop-alignment bits"
76 " of the loop header PC will be 0)."),
77 cl::Hidden);
78
79static cl::opt<bool> MulConstantOptimization(
80 "mul-constant-optimization", cl::init(true),
81 cl::desc("Replace 'mul x, Const' with more effective instructions like "
82 "SHIFT, LEA, etc."),
83 cl::Hidden);
84
85static cl::opt<bool> ExperimentalUnorderedISEL(
86 "x86-experimental-unordered-atomic-isel", cl::init(false),
87 cl::desc("Use LoadSDNode and StoreSDNode instead of "
88 "AtomicSDNode for unordered atomic loads and "
89 "stores respectively."),
90 cl::Hidden);
91
92/// Call this when the user attempts to do something unsupported, like
93/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
94/// report_fatal_error, so calling code should attempt to recover without
95/// crashing.
96static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
97 const char *Msg) {
98 MachineFunction &MF = DAG.getMachineFunction();
99 DAG.getContext()->diagnose(
100 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
101}
102
103X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
104 const X86Subtarget &STI)
105 : TargetLowering(TM), Subtarget(STI) {
106 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
107 X86ScalarSSEf64 = Subtarget.hasSSE2();
108 X86ScalarSSEf32 = Subtarget.hasSSE1();
109 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
110
111 // Set up the TargetLowering object.
112
113 // X86 is weird. It always uses i8 for shift amounts and setcc results.
114 setBooleanContents(ZeroOrOneBooleanContent);
115 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
116 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
117
118 // For 64-bit, since we have so many registers, use the ILP scheduler.
119 // For 32-bit, use the register pressure specific scheduling.
120 // For Atom, always use ILP scheduling.
121 if (Subtarget.isAtom())
122 setSchedulingPreference(Sched::ILP);
123 else if (Subtarget.is64Bit())
124 setSchedulingPreference(Sched::ILP);
125 else
126 setSchedulingPreference(Sched::RegPressure);
127 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
128 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
129
130 // Bypass expensive divides and use cheaper ones.
131 if (TM.getOptLevel() >= CodeGenOpt::Default) {
132 if (Subtarget.hasSlowDivide32())
133 addBypassSlowDiv(32, 8);
134 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
135 addBypassSlowDiv(64, 32);
136 }
137
138 if (Subtarget.isTargetWindowsMSVC() ||
139 Subtarget.isTargetWindowsItanium()) {
140 // Setup Windows compiler runtime calls.
141 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
142 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
143 setLibcallName(RTLIB::SREM_I64, "_allrem");
144 setLibcallName(RTLIB::UREM_I64, "_aullrem");
145 setLibcallName(RTLIB::MUL_I64, "_allmul");
146 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
147 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
148 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
149 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
150 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
151 }
152
153 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
154 // MSVCRT doesn't have powi; fall back to pow
155 setLibcallName(RTLIB::POWI_F32, nullptr);
156 setLibcallName(RTLIB::POWI_F64, nullptr);
157 }
158
159 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
160 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
161 // FIXME: Should we be limiting the atomic size on other configs? Default is
162 // 1024.
163 if (!Subtarget.hasCmpxchg8b())
164 setMaxAtomicSizeInBitsSupported(32);
165
166 // Set up the register classes.
167 addRegisterClass(MVT::i8, &X86::GR8RegClass);
168 addRegisterClass(MVT::i16, &X86::GR16RegClass);
169 addRegisterClass(MVT::i32, &X86::GR32RegClass);
170 if (Subtarget.is64Bit())
171 addRegisterClass(MVT::i64, &X86::GR64RegClass);
172
173 for (MVT VT : MVT::integer_valuetypes())
174 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
175
176 // We don't accept any truncstore of integer registers.
177 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
178 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
179 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
180 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
181 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
182 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
183
184 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
185
186 // SETOEQ and SETUNE require checking two conditions.
187 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
188 setCondCodeAction(ISD::SETOEQ, VT, Expand);
189 setCondCodeAction(ISD::SETUNE, VT, Expand);
190 }
191
192 // Integer absolute.
193 if (Subtarget.hasCMov()) {
194 setOperationAction(ISD::ABS , MVT::i16 , Custom);
195 setOperationAction(ISD::ABS , MVT::i32 , Custom);
196 if (Subtarget.is64Bit())
197 setOperationAction(ISD::ABS , MVT::i64 , Custom);
198 }
199
200 // Funnel shifts.
201 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
202 // For slow shld targets we only lower for code size.
203 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
204
205 setOperationAction(ShiftOp , MVT::i8 , Custom);
206 setOperationAction(ShiftOp , MVT::i16 , Custom);
207 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
208 if (Subtarget.is64Bit())
209 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
210 }
211
212 if (!Subtarget.useSoftFloat()) {
213 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
214 // operation.
215 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
216 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
217 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
218 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
219 // We have an algorithm for SSE2, and we turn this into a 64-bit
220 // FILD or VCVTUSI2SS/SD for other targets.
221 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
222 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
223 // We have an algorithm for SSE2->double, and we turn this into a
224 // 64-bit FILD followed by conditional FADD for other targets.
225 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
226 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
227
228 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
229 // this operation.
230 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
231 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
232 // SSE has no i16 to fp conversion, only i32. We promote in the handler
233 // to allow f80 to use i16 and f64 to use i16 with sse1 only
234 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
235 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
236 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
237 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
238 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
239 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
240 // are Legal, f80 is custom lowered.
241 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
242 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
243
244 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
245 // this operation.
246 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
247 // FIXME: This doesn't generate invalid exception when it should. PR44019.
248 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
249 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
250 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
251 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
252 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
253 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
254 // are Legal, f80 is custom lowered.
255 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
256 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
257
258 // Handle FP_TO_UINT by promoting the destination to a larger signed
259 // conversion.
260 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
261 // FIXME: This doesn't generate invalid exception when it should. PR44019.
262 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
263 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
264 // FIXME: This doesn't generate invalid exception when it should. PR44019.
265 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
266 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
267 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
268 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
269 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
270
271 setOperationAction(ISD::LRINT, MVT::f32, Custom);
272 setOperationAction(ISD::LRINT, MVT::f64, Custom);
273 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
274 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
275
276 if (!Subtarget.is64Bit()) {
277 setOperationAction(ISD::LRINT, MVT::i64, Custom);
278 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
279 }
280 }
281
282 // Handle address space casts between mixed sized pointers.
283 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
284 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
285
286 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
287 if (!X86ScalarSSEf64) {
288 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
289 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
290 if (Subtarget.is64Bit()) {
291 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
292 // Without SSE, i64->f64 goes through memory.
293 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
294 }
295 } else if (!Subtarget.is64Bit())
296 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
297
298 // Scalar integer divide and remainder are lowered to use operations that
299 // produce two results, to match the available instructions. This exposes
300 // the two-result form to trivial CSE, which is able to combine x/y and x%y
301 // into a single instruction.
302 //
303 // Scalar integer multiply-high is also lowered to use two-result
304 // operations, to match the available instructions. However, plain multiply
305 // (low) operations are left as Legal, as there are single-result
306 // instructions for this in x86. Using the two-result multiply instructions
307 // when both high and low results are needed must be arranged by dagcombine.
308 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
309 setOperationAction(ISD::MULHS, VT, Expand);
310 setOperationAction(ISD::MULHU, VT, Expand);
311 setOperationAction(ISD::SDIV, VT, Expand);
312 setOperationAction(ISD::UDIV, VT, Expand);
313 setOperationAction(ISD::SREM, VT, Expand);
314 setOperationAction(ISD::UREM, VT, Expand);
315 }
316
317 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
318 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
319 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
320 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
321 setOperationAction(ISD::BR_CC, VT, Expand);
322 setOperationAction(ISD::SELECT_CC, VT, Expand);
323 }
324 if (Subtarget.is64Bit())
325 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
326 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
327 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
328 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
329
330 setOperationAction(ISD::FREM , MVT::f32 , Expand);
331 setOperationAction(ISD::FREM , MVT::f64 , Expand);
332 setOperationAction(ISD::FREM , MVT::f80 , Expand);
333 setOperationAction(ISD::FREM , MVT::f128 , Expand);
334 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
335
336 // Promote the i8 variants and force them on up to i32 which has a shorter
337 // encoding.
338 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
339 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
340 if (!Subtarget.hasBMI()) {
341 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
342 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
343 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
344 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
345 if (Subtarget.is64Bit()) {
346 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
347 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
348 }
349 }
350
351 if (Subtarget.hasLZCNT()) {
352 // When promoting the i8 variants, force them to i32 for a shorter
353 // encoding.
354 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
355 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
356 } else {
357 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
358 if (VT == MVT::i64 && !Subtarget.is64Bit())
359 continue;
360 setOperationAction(ISD::CTLZ , VT, Custom);
361 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
362 }
363 }
364
365 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
366 ISD::STRICT_FP_TO_FP16}) {
367 // Special handling for half-precision floating point conversions.
368 // If we don't have F16C support, then lower half float conversions
369 // into library calls.
370 setOperationAction(
371 Op, MVT::f32,
372 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
373 // There's never any support for operations beyond MVT::f32.
374 setOperationAction(Op, MVT::f64, Expand);
375 setOperationAction(Op, MVT::f80, Expand);
376 setOperationAction(Op, MVT::f128, Expand);
377 }
378
379 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
380 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
381 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
382 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
383 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
384 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
385 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
386 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
387
388 setOperationAction(ISD::PARITY, MVT::i8, Custom);
389 if (Subtarget.hasPOPCNT()) {
390 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
391 } else {
392 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
393 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
394 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
395 if (Subtarget.is64Bit())
396 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
397 else
398 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
399
400 setOperationAction(ISD::PARITY, MVT::i16, Custom);
401 setOperationAction(ISD::PARITY, MVT::i32, Custom);
402 if (Subtarget.is64Bit())
403 setOperationAction(ISD::PARITY, MVT::i64, Custom);
404 }
405
406 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
407
408 if (!Subtarget.hasMOVBE())
409 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
410
411 // X86 wants to expand cmov itself.
412 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
413 setOperationAction(ISD::SELECT, VT, Custom);
414 setOperationAction(ISD::SETCC, VT, Custom);
415 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
416 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
417 }
418 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
419 if (VT == MVT::i64 && !Subtarget.is64Bit())
420 continue;
421 setOperationAction(ISD::SELECT, VT, Custom);
422 setOperationAction(ISD::SETCC, VT, Custom);
423 }
424
425 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
426 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
427 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
428
429 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
430 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
431 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
432 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
433 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
434 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
435 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
436 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
437
438 // Darwin ABI issue.
439 for (auto VT : { MVT::i32, MVT::i64 }) {
440 if (VT == MVT::i64 && !Subtarget.is64Bit())
441 continue;
442 setOperationAction(ISD::ConstantPool , VT, Custom);
443 setOperationAction(ISD::JumpTable , VT, Custom);
444 setOperationAction(ISD::GlobalAddress , VT, Custom);
445 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
446 setOperationAction(ISD::ExternalSymbol , VT, Custom);
447 setOperationAction(ISD::BlockAddress , VT, Custom);
448 }
449
450 // 64-bit shl, sra, srl (iff 32-bit x86)
451 for (auto VT : { MVT::i32, MVT::i64 }) {
452 if (VT == MVT::i64 && !Subtarget.is64Bit())
453 continue;
454 setOperationAction(ISD::SHL_PARTS, VT, Custom);
455 setOperationAction(ISD::SRA_PARTS, VT, Custom);
456 setOperationAction(ISD::SRL_PARTS, VT, Custom);
457 }
458
459 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
460 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
461
462 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
463
464 // Expand certain atomics
465 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
466 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
467 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
468 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
469 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
470 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
471 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
472 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
473 }
474
475 if (!Subtarget.is64Bit())
476 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
477
478 if (Subtarget.hasCmpxchg16b()) {
479 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
480 }
481
482 // FIXME - use subtarget debug flags
483 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
484 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
485 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
486 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
487 }
488
489 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
490 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
491
492 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
493 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
494
495 setOperationAction(ISD::TRAP, MVT::Other, Legal);
496 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
497
498 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
499 setOperationAction(ISD::VASTART , MVT::Other, Custom);
500 setOperationAction(ISD::VAEND , MVT::Other, Expand);
501 bool Is64Bit = Subtarget.is64Bit();
502 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
503 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
504
505 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
506 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
507
508 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
509
510 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
511 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
512 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
513
514 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
515 // f32 and f64 use SSE.
516 // Set up the FP register classes.
517 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
518 : &X86::FR32RegClass);
519 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
520 : &X86::FR64RegClass);
521
522 // Disable f32->f64 extload as we can only generate this in one instruction
523 // under optsize. So its easier to pattern match (fpext (load)) for that
524 // case instead of needing to emit 2 instructions for extload in the
525 // non-optsize case.
526 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
527
528 for (auto VT : { MVT::f32, MVT::f64 }) {
529 // Use ANDPD to simulate FABS.
530 setOperationAction(ISD::FABS, VT, Custom);
531
532 // Use XORP to simulate FNEG.
533 setOperationAction(ISD::FNEG, VT, Custom);
534
535 // Use ANDPD and ORPD to simulate FCOPYSIGN.
536 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
537
538 // These might be better off as horizontal vector ops.
539 setOperationAction(ISD::FADD, VT, Custom);
540 setOperationAction(ISD::FSUB, VT, Custom);
541
542 // We don't support sin/cos/fmod
543 setOperationAction(ISD::FSIN , VT, Expand);
544 setOperationAction(ISD::FCOS , VT, Expand);
545 setOperationAction(ISD::FSINCOS, VT, Expand);
546 }
547
548 // Lower this to MOVMSK plus an AND.
549 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
550 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
551
552 } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
553 (UseX87 || Is64Bit)) {
554 // Use SSE for f32, x87 for f64.
555 // Set up the FP register classes.
556 addRegisterClass(MVT::f32, &X86::FR32RegClass);
557 if (UseX87)
558 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
559
560 // Use ANDPS to simulate FABS.
561 setOperationAction(ISD::FABS , MVT::f32, Custom);
562
563 // Use XORP to simulate FNEG.
564 setOperationAction(ISD::FNEG , MVT::f32, Custom);
565
566 if (UseX87)
567 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
568
569 // Use ANDPS and ORPS to simulate FCOPYSIGN.
570 if (UseX87)
571 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
572 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
573
574 // We don't support sin/cos/fmod
575 setOperationAction(ISD::FSIN , MVT::f32, Expand);
576 setOperationAction(ISD::FCOS , MVT::f32, Expand);
577 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
578
579 if (UseX87) {
580 // Always expand sin/cos functions even though x87 has an instruction.
581 setOperationAction(ISD::FSIN, MVT::f64, Expand);
582 setOperationAction(ISD::FCOS, MVT::f64, Expand);
583 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
584 }
585 } else if (UseX87) {
586 // f32 and f64 in x87.
587 // Set up the FP register classes.
588 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
589 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
590
591 for (auto VT : { MVT::f32, MVT::f64 }) {
592 setOperationAction(ISD::UNDEF, VT, Expand);
593 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
594
595 // Always expand sin/cos functions even though x87 has an instruction.
596 setOperationAction(ISD::FSIN , VT, Expand);
597 setOperationAction(ISD::FCOS , VT, Expand);
598 setOperationAction(ISD::FSINCOS, VT, Expand);
599 }
600 }
601
602 // Expand FP32 immediates into loads from the stack, save special cases.
603 if (isTypeLegal(MVT::f32)) {
604 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
605 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
606 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
607 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
608 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
609 } else // SSE immediates.
610 addLegalFPImmediate(APFloat(+0.0f)); // xorps
611 }
612 // Expand FP64 immediates into loads from the stack, save special cases.
613 if (isTypeLegal(MVT::f64)) {
614 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
615 addLegalFPImmediate(APFloat(+0.0)); // FLD0
616 addLegalFPImmediate(APFloat(+1.0)); // FLD1
617 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
618 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
619 } else // SSE immediates.
620 addLegalFPImmediate(APFloat(+0.0)); // xorpd
621 }
622 // Handle constrained floating-point operations of scalar.
623 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
624 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
625 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
626 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
627 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
628 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
629 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
630 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
631 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
632 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
633 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
634 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
635 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
636
637 // We don't support FMA.
638 setOperationAction(ISD::FMA, MVT::f64, Expand);
639 setOperationAction(ISD::FMA, MVT::f32, Expand);
640
641 // f80 always uses X87.
642 if (UseX87) {
643 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
644 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
645 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
646 {
647 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
648 addLegalFPImmediate(TmpFlt); // FLD0
649 TmpFlt.changeSign();
650 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
651
652 bool ignored;
653 APFloat TmpFlt2(+1.0);
654 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
655 &ignored);
656 addLegalFPImmediate(TmpFlt2); // FLD1
657 TmpFlt2.changeSign();
658 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
659 }
660
661 // Always expand sin/cos functions even though x87 has an instruction.
662 setOperationAction(ISD::FSIN , MVT::f80, Expand);
663 setOperationAction(ISD::FCOS , MVT::f80, Expand);
664 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
665
666 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
667 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
668 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
669 setOperationAction(ISD::FRINT, MVT::f80, Expand);
670 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
671 setOperationAction(ISD::FMA, MVT::f80, Expand);
672 setOperationAction(ISD::LROUND, MVT::f80, Expand);
673 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
674 setOperationAction(ISD::LRINT, MVT::f80, Custom);
675 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
676
677 // Handle constrained floating-point operations of scalar.
678 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
679 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
680 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
681 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
682 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
683 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
684 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
685 // as Custom.
686 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
687 }
688
689 // f128 uses xmm registers, but most operations require libcalls.
690 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
691 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
692 : &X86::VR128RegClass);
693
694 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
695
696 setOperationAction(ISD::FADD, MVT::f128, LibCall);
697 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
698 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
699 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
700 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
701 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
702 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
703 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
704 setOperationAction(ISD::FMA, MVT::f128, LibCall);
705 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
706
707 setOperationAction(ISD::FABS, MVT::f128, Custom);
708 setOperationAction(ISD::FNEG, MVT::f128, Custom);
709 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
710
711 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
712 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
713 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
714 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
715 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
716 // No STRICT_FSINCOS
717 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
718 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
719
720 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
721 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
722 // We need to custom handle any FP_ROUND with an f128 input, but
723 // LegalizeDAG uses the result type to know when to run a custom handler.
724 // So we have to list all legal floating point result types here.
725 if (isTypeLegal(MVT::f32)) {
726 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
727 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
728 }
729 if (isTypeLegal(MVT::f64)) {
730 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
731 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
732 }
733 if (isTypeLegal(MVT::f80)) {
734 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
735 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
736 }
737
738 setOperationAction(ISD::SETCC, MVT::f128, Custom);
739
740 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
741 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
742 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
743 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
744 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
745 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
746 }
747
748 // Always use a library call for pow.
749 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
750 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
751 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
752 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
753
754 setOperationAction(ISD::FLOG, MVT::f80, Expand);
755 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
756 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
757 setOperationAction(ISD::FEXP, MVT::f80, Expand);
758 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
759 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
760 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
761
762 // Some FP actions are always expanded for vector types.
763 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
764 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
765 setOperationAction(ISD::FSIN, VT, Expand);
766 setOperationAction(ISD::FSINCOS, VT, Expand);
767 setOperationAction(ISD::FCOS, VT, Expand);
768 setOperationAction(ISD::FREM, VT, Expand);
769 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
770 setOperationAction(ISD::FPOW, VT, Expand);
771 setOperationAction(ISD::FLOG, VT, Expand);
772 setOperationAction(ISD::FLOG2, VT, Expand);
773 setOperationAction(ISD::FLOG10, VT, Expand);
774 setOperationAction(ISD::FEXP, VT, Expand);
775 setOperationAction(ISD::FEXP2, VT, Expand);
776 }
777
778 // First set operation action for all vector types to either promote
779 // (for widening) or expand (for scalarization). Then we will selectively
780 // turn on ones that can be effectively codegen'd.
781 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
782 setOperationAction(ISD::SDIV, VT, Expand);
783 setOperationAction(ISD::UDIV, VT, Expand);
784 setOperationAction(ISD::SREM, VT, Expand);
785 setOperationAction(ISD::UREM, VT, Expand);
786 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
787 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
788 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
789 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
790 setOperationAction(ISD::FMA, VT, Expand);
791 setOperationAction(ISD::FFLOOR, VT, Expand);
792 setOperationAction(ISD::FCEIL, VT, Expand);
793 setOperationAction(ISD::FTRUNC, VT, Expand);
794 setOperationAction(ISD::FRINT, VT, Expand);
795 setOperationAction(ISD::FNEARBYINT, VT, Expand);
796 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
797 setOperationAction(ISD::MULHS, VT, Expand);
798 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
799 setOperationAction(ISD::MULHU, VT, Expand);
800 setOperationAction(ISD::SDIVREM, VT, Expand);
801 setOperationAction(ISD::UDIVREM, VT, Expand);
802 setOperationAction(ISD::CTPOP, VT, Expand);
803 setOperationAction(ISD::CTTZ, VT, Expand);
804 setOperationAction(ISD::CTLZ, VT, Expand);
805 setOperationAction(ISD::ROTL, VT, Expand);
806 setOperationAction(ISD::ROTR, VT, Expand);
807 setOperationAction(ISD::BSWAP, VT, Expand);
808 setOperationAction(ISD::SETCC, VT, Expand);
809 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
810 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
811 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
812 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
813 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
814 setOperationAction(ISD::TRUNCATE, VT, Expand);
815 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
816 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
817 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
818 setOperationAction(ISD::SELECT_CC, VT, Expand);
819 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
820 setTruncStoreAction(InnerVT, VT, Expand);
821
822 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
823 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
824
825 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
826 // types, we have to deal with them whether we ask for Expansion or not.
827 // Setting Expand causes its own optimisation problems though, so leave
828 // them legal.
829 if (VT.getVectorElementType() == MVT::i1)
830 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
831
832 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
833 // split/scalarized right now.
834 if (VT.getVectorElementType() == MVT::f16)
835 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
836 }
837 }
838
839 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
840 // with -msoft-float, disable use of MMX as well.
841 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
842 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
843 // No operations on x86mmx supported, everything uses intrinsics.
844 }
845
846 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
847 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
848 : &X86::VR128RegClass);
849
850 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
851 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
852 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
853 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
854 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
855 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
856 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
857 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
858
859 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
860 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
861
862 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
863 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
864 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
865 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
866 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
867 }
868
869 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
870 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
871 : &X86::VR128RegClass);
872
873 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
874 // registers cannot be used even for integer operations.
875 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
876 : &X86::VR128RegClass);
877 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
878 : &X86::VR128RegClass);
879 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
880 : &X86::VR128RegClass);
881 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
882 : &X86::VR128RegClass);
883
884 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
885 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
886 setOperationAction(ISD::SDIV, VT, Custom);
887 setOperationAction(ISD::SREM, VT, Custom);
888 setOperationAction(ISD::UDIV, VT, Custom);
889 setOperationAction(ISD::UREM, VT, Custom);
890 }
891
892 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
893 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
894 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
895
896 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
897 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
898 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
899 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
900 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
901 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
902 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
903 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
904 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
905 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
906 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
907 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
908 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
909
910 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
911 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
912 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
913 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
914 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
915 }
916
917 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
918 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
919 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
920 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
921 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
922 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
923 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
924 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
925 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
926 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
927 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
928 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
929
930 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
931 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
932 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
933
934 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
935 setOperationAction(ISD::SETCC, VT, Custom);
936 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
937 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
938 setOperationAction(ISD::CTPOP, VT, Custom);
939 setOperationAction(ISD::ABS, VT, Custom);
940
941 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
942 // setcc all the way to isel and prefer SETGT in some isel patterns.
943 setCondCodeAction(ISD::SETLT, VT, Custom);
944 setCondCodeAction(ISD::SETLE, VT, Custom);
945 }
946
947 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
948 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
949 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
950 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
951 setOperationAction(ISD::VSELECT, VT, Custom);
952 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
953 }
954
955 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
956 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
957 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
958 setOperationAction(ISD::VSELECT, VT, Custom);
959
960 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
961 continue;
962
963 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
964 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
965 }
966
967 // Custom lower v2i64 and v2f64 selects.
968 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
969 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
970 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
971 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
972 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
973
974 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
975 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
976 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
977 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
978
979 // Custom legalize these to avoid over promotion or custom promotion.
980 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
981 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
982 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
983 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
984 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
985 }
986
987 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
988 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
989 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
990 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
991
992 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
993 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
994
995 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
996 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
997
998 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
999 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1000 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1001 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1002 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1003
1004 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1005 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1006 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1007 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1008
1009 // We want to legalize this to an f64 load rather than an i64 load on
1010 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1011 // store.
1012 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1013 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1014 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1015 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1016 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1017 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1018
1019 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1020 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1021 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1022 if (!Subtarget.hasAVX512())
1023 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1024
1025 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1026 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1027 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1028
1029 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1030
1031 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1032 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1033 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1034 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1035 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1036 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1037
1038 // In the customized shift lowering, the legal v4i32/v2i64 cases
1039 // in AVX2 will be recognized.
1040 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1041 setOperationAction(ISD::SRL, VT, Custom);
1042 setOperationAction(ISD::SHL, VT, Custom);
1043 setOperationAction(ISD::SRA, VT, Custom);
1044 }
1045
1046 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1047 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1048
1049 // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
1050 // shifts) is better.
1051 if (!Subtarget.useAVX512Regs() &&
1052 !(Subtarget.hasBWI() && Subtarget.hasVLX()))
1053 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1054
1055 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1056 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1057 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1058 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1059 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1060 }
1061
1062 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1063 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1064 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1065 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1066 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1067 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1068 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1069 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1070 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1071
1072 // These might be better off as horizontal vector ops.
1073 setOperationAction(ISD::ADD, MVT::i16, Custom);
1074 setOperationAction(ISD::ADD, MVT::i32, Custom);
1075 setOperationAction(ISD::SUB, MVT::i16, Custom);
1076 setOperationAction(ISD::SUB, MVT::i32, Custom);
1077 }
1078
1079 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1080 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1081 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1082 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1083 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1084 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1085 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1086 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1087 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1088 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1089 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1090 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1091 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1092 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1093
1094 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1095 }
1096
1097 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1098 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1099 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1100 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1101 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1102 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1103 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1104 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1105
1106 // FIXME: Do we need to handle scalar-to-vector here?
1107 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1108
1109 // We directly match byte blends in the backend as they match the VSELECT
1110 // condition form.
1111 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1112
1113 // SSE41 brings specific instructions for doing vector sign extend even in
1114 // cases where we don't have SRA.
1115 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1116 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1117 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1118 }
1119
1120 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1121 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1122 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1123 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1124 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1125 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1126 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1127 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1128 }
1129
1130 // i8 vectors are custom because the source register and source
1131 // source memory operand types are not the same width.
1132 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1133
1134 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1135 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1136 // do the pre and post work in the vector domain.
1137 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1138 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1139 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1140 // so that DAG combine doesn't try to turn it into uint_to_fp.
1141 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1142 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1143 }
1144 }
1145
1146 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1147 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1148 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1149 setOperationAction(ISD::ROTL, VT, Custom);
1150
1151 // XOP can efficiently perform BITREVERSE with VPPERM.
1152 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1153 setOperationAction(ISD::BITREVERSE, VT, Custom);
1154
1155 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1156 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1157 setOperationAction(ISD::BITREVERSE, VT, Custom);
1158 }
1159
1160 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1161 bool HasInt256 = Subtarget.hasInt256();
1162
1163 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1164 : &X86::VR256RegClass);
1165 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1166 : &X86::VR256RegClass);
1167 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1168 : &X86::VR256RegClass);
1169 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1170 : &X86::VR256RegClass);
1171 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1172 : &X86::VR256RegClass);
1173 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1174 : &X86::VR256RegClass);
1175
1176 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1177 setOperationAction(ISD::FFLOOR, VT, Legal);
1178 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1179 setOperationAction(ISD::FCEIL, VT, Legal);
1180 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1181 setOperationAction(ISD::FTRUNC, VT, Legal);
1182 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1183 setOperationAction(ISD::FRINT, VT, Legal);
1184 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1185 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1186 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1187 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1188 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1189
1190 setOperationAction(ISD::FROUND, VT, Custom);
1191
1192 setOperationAction(ISD::FNEG, VT, Custom);
1193 setOperationAction(ISD::FABS, VT, Custom);
1194 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1195 }
1196
1197 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1198 // even though v8i16 is a legal type.
1199 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1200 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1201 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1202 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1203 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1204 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1205
1206 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1207 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1208
1209 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1210 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1211 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1212 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1213 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1214 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1215 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1216 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1217 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1218 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1219 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1220 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1221
1222 if (!Subtarget.hasAVX512())
1223 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1224
1225 // In the customized shift lowering, the legal v8i32/v4i64 cases
1226 // in AVX2 will be recognized.
1227 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1228 setOperationAction(ISD::SRL, VT, Custom);
1229 setOperationAction(ISD::SHL, VT, Custom);
1230 setOperationAction(ISD::SRA, VT, Custom);
1231 }
1232
1233 // These types need custom splitting if their input is a 128-bit vector.
1234 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1235 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1236 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1237 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1238
1239 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1240 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1241
1242 // With BWI, expanding (and promoting the shifts) is the better.
1243 if (!Subtarget.useBWIRegs())
1244 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1245
1246 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1247 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1248 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1249 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1250 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1251 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1252
1253 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1254 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1255 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1256 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1257 }
1258
1259 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1260 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1261 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1262 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1263
1264 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1265 setOperationAction(ISD::SETCC, VT, Custom);
1266 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1267 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1268 setOperationAction(ISD::CTPOP, VT, Custom);
1269 setOperationAction(ISD::CTLZ, VT, Custom);
1270
1271 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1272 // setcc all the way to isel and prefer SETGT in some isel patterns.
1273 setCondCodeAction(ISD::SETLT, VT, Custom);
1274 setCondCodeAction(ISD::SETLE, VT, Custom);
1275 }
1276
1277 if (Subtarget.hasAnyFMA()) {
1278 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1279 MVT::v2f64, MVT::v4f64 }) {
1280 setOperationAction(ISD::FMA, VT, Legal);
1281 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1282 }
1283 }
1284
1285 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1286 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1287 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1288 }
1289
1290 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1291 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1292 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1293 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1294
1295 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1296 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1297 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1298 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1299 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1300 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1301
1302 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1303 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1304 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1305 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1306 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1307
1308 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1309 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1310 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1311 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1312 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1313 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1314 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1315 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1316
1317 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1318 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1319 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1320 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1321 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1322 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1323 }
1324
1325 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1326 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1327 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1328 }
1329
1330 if (HasInt256) {
1331 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1332 // when we have a 256bit-wide blend with immediate.
1333 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1334 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1335
1336 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1337 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1338 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1339 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1340 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1341 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1342 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1343 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1344 }
1345 }
1346
1347 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1348 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1349 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1350 setOperationAction(ISD::MSTORE, VT, Legal);
1351 }
1352
1353 // Extract subvector is special because the value type
1354 // (result) is 128-bit but the source is 256-bit wide.
1355 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1356 MVT::v4f32, MVT::v2f64 }) {
1357 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1358 }
1359
1360 // Custom lower several nodes for 256-bit types.
1361 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1362 MVT::v8f32, MVT::v4f64 }) {
1363 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1364 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1365 setOperationAction(ISD::VSELECT, VT, Custom);
1366 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1367 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1368 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1369 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1370 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1371 setOperationAction(ISD::STORE, VT, Custom);
1372 }
1373
1374 if (HasInt256) {
1375 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1376
1377 // Custom legalize 2x32 to get a little better code.
1378 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1379 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1380
1381 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1382 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1383 setOperationAction(ISD::MGATHER, VT, Custom);
1384 }
1385 }
1386
1387 // This block controls legalization of the mask vector sizes that are
1388 // available with AVX512. 512-bit vectors are in a separate block controlled
1389 // by useAVX512Regs.
1390 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1391 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1392 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1393 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1394 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1395 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1396
1397 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1398 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1399 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1400
1401 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1402 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1403 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1404 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1405 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1406 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1407 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1408 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1409 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1410 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1411 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1412 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1413
1414 // There is no byte sized k-register load or store without AVX512DQ.
1415 if (!Subtarget.hasDQI()) {
1416 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1417 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1418 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1419 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1420
1421 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1422 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1423 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1424 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1425 }
1426
1427 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1428 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1429 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1430 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1431 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1432 }
1433
1434 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1435 setOperationAction(ISD::ADD, VT, Custom);
1436 setOperationAction(ISD::SUB, VT, Custom);
1437 setOperationAction(ISD::MUL, VT, Custom);
1438 setOperationAction(ISD::UADDSAT, VT, Custom);
1439 setOperationAction(ISD::SADDSAT, VT, Custom);
1440 setOperationAction(ISD::USUBSAT, VT, Custom);
1441 setOperationAction(ISD::SSUBSAT, VT, Custom);
1442 setOperationAction(ISD::VSELECT, VT, Expand);
1443 }
1444
1445 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1446 setOperationAction(ISD::SETCC, VT, Custom);
1447 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1448 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1449 setOperationAction(ISD::SELECT, VT, Custom);
1450 setOperationAction(ISD::TRUNCATE, VT, Custom);
1451
1452 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1453 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1454 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1455 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1456 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1457 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1458 }
1459
1460 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1461 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1462 }
1463
1464 // This block controls legalization for 512-bit operations with 32/64 bit
1465 // elements. 512-bits can be disabled based on prefer-vector-width and
1466 // required-vector-width function attributes.
1467 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1468 bool HasBWI = Subtarget.hasBWI();
1469
1470 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1471 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1472 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1473 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1474 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1475 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1476
1477 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1478 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1479 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1480 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1481 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1482 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1483 if (HasBWI)
1484 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1485 }
1486
1487 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1488 setOperationAction(ISD::FNEG, VT, Custom);
1489 setOperationAction(ISD::FABS, VT, Custom);
1490 setOperationAction(ISD::FMA, VT, Legal);
1491 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1492 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1493 }
1494
1495 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1496 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1497 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1498 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1499 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1500 }
1501 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1502 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1503 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1504 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1505 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1506 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1507 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1508 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1509
1510 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1511 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1512 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1513 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1514 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1515 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1516 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1517 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1518 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1519 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1520 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1521 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1522
1523 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1524 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1525 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1526 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1527 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1528 if (HasBWI)
1529 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1530
1531 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1532 // to 512-bit rather than use the AVX2 instructions so that we can use
1533 // k-masks.
1534 if (!Subtarget.hasVLX()) {
1535 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1536 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1537 setOperationAction(ISD::MLOAD, VT, Custom);
1538 setOperationAction(ISD::MSTORE, VT, Custom);
1539 }
1540 }
1541
1542 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1543 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1544 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1545 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1546 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1547 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1548 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1549 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1550 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1551 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1552 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1553 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1554 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1555
1556 if (HasBWI) {
1557 // Extends from v64i1 masks to 512-bit vectors.
1558 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1559 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1560 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1561 }
1562
1563 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1564 setOperationAction(ISD::FFLOOR, VT, Legal);
1565 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1566 setOperationAction(ISD::FCEIL, VT, Legal);
1567 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1568 setOperationAction(ISD::FTRUNC, VT, Legal);
1569 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1570 setOperationAction(ISD::FRINT, VT, Legal);
1571 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1572 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1573 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1574 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1575 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1576
1577 setOperationAction(ISD::FROUND, VT, Custom);
1578 }
1579
1580 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1581 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1582 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1583 }
1584
1585 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1586 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1587 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1588 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1589
1590 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1591 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1592 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1593 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1594
1595 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1596 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1597 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1598 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1599 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1600 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1601
1602 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1603
1604 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1605 setOperationAction(ISD::SRL, VT, Custom);
1606 setOperationAction(ISD::SHL, VT, Custom);
1607 setOperationAction(ISD::SRA, VT, Custom);
1608 setOperationAction(ISD::SETCC, VT, Custom);
1609
1610 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1611 // setcc all the way to isel and prefer SETGT in some isel patterns.
1612 setCondCodeAction(ISD::SETLT, VT, Custom);
1613 setCondCodeAction(ISD::SETLE, VT, Custom);
1614 }
1615 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1616 setOperationAction(ISD::SMAX, VT, Legal);
1617 setOperationAction(ISD::UMAX, VT, Legal);
1618 setOperationAction(ISD::SMIN, VT, Legal);
1619 setOperationAction(ISD::UMIN, VT, Legal);
1620 setOperationAction(ISD::ABS, VT, Legal);
1621 setOperationAction(ISD::CTPOP, VT, Custom);
1622 setOperationAction(ISD::ROTL, VT, Custom);
1623 setOperationAction(ISD::ROTR, VT, Custom);
1624 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1625 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1626 }
1627
1628 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1629 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1630 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1631 setOperationAction(ISD::CTLZ, VT, Custom);
1632 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1633 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1634 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1635 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1636 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1637 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1638 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1639 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1640 }
1641
1642 if (Subtarget.hasDQI()) {
1643 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1644 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1645 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1646 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1647 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1648 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1649 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1650 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1651
1652 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1653 }
1654
1655 if (Subtarget.hasCDI()) {
1656 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1657 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1658 setOperationAction(ISD::CTLZ, VT, Legal);
1659 }
1660 } // Subtarget.hasCDI()
1661
1662 if (Subtarget.hasVPOPCNTDQ()) {
1663 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1664 setOperationAction(ISD::CTPOP, VT, Legal);
1665 }
1666
1667 // Extract subvector is special because the value type
1668 // (result) is 256-bit but the source is 512-bit wide.
1669 // 128-bit was made Legal under AVX1.
1670 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1671 MVT::v8f32, MVT::v4f64 })
1672 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1673
1674 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1675 MVT::v16f32, MVT::v8f64 }) {
1676 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1677 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1678 setOperationAction(ISD::SELECT, VT, Custom);
1679 setOperationAction(ISD::VSELECT, VT, Custom);
1680 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1681 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1682 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1683 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1684 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1685 }
1686
1687 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1688 setOperationAction(ISD::MLOAD, VT, Legal);
1689 setOperationAction(ISD::MSTORE, VT, Legal);
1690 setOperationAction(ISD::MGATHER, VT, Custom);
1691 setOperationAction(ISD::MSCATTER, VT, Custom);
1692 }
1693 if (HasBWI) {
1694 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1695 setOperationAction(ISD::MLOAD, VT, Legal);
1696 setOperationAction(ISD::MSTORE, VT, Legal);
1697 }
1698 } else {
1699 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1700 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1701 }
1702
1703 if (Subtarget.hasVBMI2()) {
1704 for (auto VT : { MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1705 setOperationAction(ISD::FSHL, VT, Custom);
1706 setOperationAction(ISD::FSHR, VT, Custom);
1707 }
1708 }
1709 }// useAVX512Regs
1710
1711 // This block controls legalization for operations that don't have
1712 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1713 // narrower widths.
1714 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1715 // These operations are handled on non-VLX by artificially widening in
1716 // isel patterns.
1717
1718 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1719 Subtarget.hasVLX() ? Legal : Custom);
1720 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1721 Subtarget.hasVLX() ? Legal : Custom);
1722 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1723 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1724 Subtarget.hasVLX() ? Legal : Custom);
1725 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1726 Subtarget.hasVLX() ? Legal : Custom);
1727 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1728 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1729 Subtarget.hasVLX() ? Legal : Custom);
1730 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1731 Subtarget.hasVLX() ? Legal : Custom);
1732 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1733 Subtarget.hasVLX() ? Legal : Custom);
1734 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1735 Subtarget.hasVLX() ? Legal : Custom);
1736
1737 if (Subtarget.hasDQI()) {
1738 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1739 // v2f32 UINT_TO_FP is already custom under SSE2.
1740 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1742, __PRETTY_FUNCTION__))
1741 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1742, __PRETTY_FUNCTION__))
1742 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1742, __PRETTY_FUNCTION__))
;
1743 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1744 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1745 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1746 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1747 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1748 }
1749
1750 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1751 setOperationAction(ISD::SMAX, VT, Legal);
1752 setOperationAction(ISD::UMAX, VT, Legal);
1753 setOperationAction(ISD::SMIN, VT, Legal);
1754 setOperationAction(ISD::UMIN, VT, Legal);
1755 setOperationAction(ISD::ABS, VT, Legal);
1756 }
1757
1758 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1759 setOperationAction(ISD::ROTL, VT, Custom);
1760 setOperationAction(ISD::ROTR, VT, Custom);
1761 }
1762
1763 // Custom legalize 2x32 to get a little better code.
1764 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1765 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1766
1767 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1768 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1769 setOperationAction(ISD::MSCATTER, VT, Custom);
1770
1771 if (Subtarget.hasDQI()) {
1772 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1773 setOperationAction(ISD::SINT_TO_FP, VT,
1774 Subtarget.hasVLX() ? Legal : Custom);
1775 setOperationAction(ISD::UINT_TO_FP, VT,
1776 Subtarget.hasVLX() ? Legal : Custom);
1777 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1778 Subtarget.hasVLX() ? Legal : Custom);
1779 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1780 Subtarget.hasVLX() ? Legal : Custom);
1781 setOperationAction(ISD::FP_TO_SINT, VT,
1782 Subtarget.hasVLX() ? Legal : Custom);
1783 setOperationAction(ISD::FP_TO_UINT, VT,
1784 Subtarget.hasVLX() ? Legal : Custom);
1785 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1786 Subtarget.hasVLX() ? Legal : Custom);
1787 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1788 Subtarget.hasVLX() ? Legal : Custom);
1789 setOperationAction(ISD::MUL, VT, Legal);
1790 }
1791 }
1792
1793 if (Subtarget.hasCDI()) {
1794 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1795 setOperationAction(ISD::CTLZ, VT, Legal);
1796 }
1797 } // Subtarget.hasCDI()
1798
1799 if (Subtarget.hasVPOPCNTDQ()) {
1800 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1801 setOperationAction(ISD::CTPOP, VT, Legal);
1802 }
1803 }
1804
1805 // This block control legalization of v32i1/v64i1 which are available with
1806 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1807 // useBWIRegs.
1808 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1809 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1810 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1811
1812 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1813 setOperationAction(ISD::ADD, VT, Custom);
1814 setOperationAction(ISD::SUB, VT, Custom);
1815 setOperationAction(ISD::MUL, VT, Custom);
1816 setOperationAction(ISD::VSELECT, VT, Expand);
1817 setOperationAction(ISD::UADDSAT, VT, Custom);
1818 setOperationAction(ISD::SADDSAT, VT, Custom);
1819 setOperationAction(ISD::USUBSAT, VT, Custom);
1820 setOperationAction(ISD::SSUBSAT, VT, Custom);
1821
1822 setOperationAction(ISD::TRUNCATE, VT, Custom);
1823 setOperationAction(ISD::SETCC, VT, Custom);
1824 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1825 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1826 setOperationAction(ISD::SELECT, VT, Custom);
1827 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1828 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1829 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1830 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1831 }
1832
1833 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1834 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1835
1836 // Extends from v32i1 masks to 256-bit vectors.
1837 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1838 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1839 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1840
1841 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1842 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1843 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1844 }
1845
1846 // These operations are handled on non-VLX by artificially widening in
1847 // isel patterns.
1848 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1849
1850 if (Subtarget.hasBITALG()) {
1851 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1852 setOperationAction(ISD::CTPOP, VT, Legal);
1853 }
1854 }
1855
1856 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1857 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1858 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1859 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1860 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1861 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1862
1863 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1864 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1865 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1866 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1867 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1868
1869 if (Subtarget.hasBWI()) {
1870 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1871 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1872 }
1873
1874 if (Subtarget.hasVBMI2()) {
1875 // TODO: Make these legal even without VLX?
1876 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1877 MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1878 setOperationAction(ISD::FSHL, VT, Custom);
1879 setOperationAction(ISD::FSHR, VT, Custom);
1880 }
1881 }
1882
1883 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1884 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1885 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1886 }
1887
1888 // We want to custom lower some of our intrinsics.
1889 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1890 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1891 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1892 if (!Subtarget.is64Bit()) {
1893 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1894 }
1895
1896 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1897 // handle type legalization for these operations here.
1898 //
1899 // FIXME: We really should do custom legalization for addition and
1900 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1901 // than generic legalization for 64-bit multiplication-with-overflow, though.
1902 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1903 if (VT == MVT::i64 && !Subtarget.is64Bit())
1904 continue;
1905 // Add/Sub/Mul with overflow operations are custom lowered.
1906 setOperationAction(ISD::SADDO, VT, Custom);
1907 setOperationAction(ISD::UADDO, VT, Custom);
1908 setOperationAction(ISD::SSUBO, VT, Custom);
1909 setOperationAction(ISD::USUBO, VT, Custom);
1910 setOperationAction(ISD::SMULO, VT, Custom);
1911 setOperationAction(ISD::UMULO, VT, Custom);
1912
1913 // Support carry in as value rather than glue.
1914 setOperationAction(ISD::ADDCARRY, VT, Custom);
1915 setOperationAction(ISD::SUBCARRY, VT, Custom);
1916 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1917 }
1918
1919 if (!Subtarget.is64Bit()) {
1920 // These libcalls are not available in 32-bit.
1921 setLibcallName(RTLIB::SHL_I128, nullptr);
1922 setLibcallName(RTLIB::SRL_I128, nullptr);
1923 setLibcallName(RTLIB::SRA_I128, nullptr);
1924 setLibcallName(RTLIB::MUL_I128, nullptr);
1925 }
1926
1927 // Combine sin / cos into _sincos_stret if it is available.
1928 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1929 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1930 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1931 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1932 }
1933
1934 if (Subtarget.isTargetWin64()) {
1935 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1936 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1937 setOperationAction(ISD::SREM, MVT::i128, Custom);
1938 setOperationAction(ISD::UREM, MVT::i128, Custom);
1939 }
1940
1941 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1942 // is. We should promote the value to 64-bits to solve this.
1943 // This is what the CRT headers do - `fmodf` is an inline header
1944 // function casting to f64 and calling `fmod`.
1945 if (Subtarget.is32Bit() &&
1946 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
1947 for (ISD::NodeType Op :
1948 {ISD::FCEIL, ISD::STRICT_FCEIL,
1949 ISD::FCOS, ISD::STRICT_FCOS,
1950 ISD::FEXP, ISD::STRICT_FEXP,
1951 ISD::FFLOOR, ISD::STRICT_FFLOOR,
1952 ISD::FREM, ISD::STRICT_FREM,
1953 ISD::FLOG, ISD::STRICT_FLOG,
1954 ISD::FLOG10, ISD::STRICT_FLOG10,
1955 ISD::FPOW, ISD::STRICT_FPOW,
1956 ISD::FSIN, ISD::STRICT_FSIN})
1957 if (isOperationExpand(Op, MVT::f32))
1958 setOperationAction(Op, MVT::f32, Promote);
1959
1960 // We have target-specific dag combine patterns for the following nodes:
1961 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1962 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1963 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
1964 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1965 setTargetDAGCombine(ISD::CONCAT_VECTORS);
1966 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1967 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1968 setTargetDAGCombine(ISD::BITCAST);
1969 setTargetDAGCombine(ISD::VSELECT);
1970 setTargetDAGCombine(ISD::SELECT);
1971 setTargetDAGCombine(ISD::SHL);
1972 setTargetDAGCombine(ISD::SRA);
1973 setTargetDAGCombine(ISD::SRL);
1974 setTargetDAGCombine(ISD::OR);
1975 setTargetDAGCombine(ISD::AND);
1976 setTargetDAGCombine(ISD::ADD);
1977 setTargetDAGCombine(ISD::FADD);
1978 setTargetDAGCombine(ISD::FSUB);
1979 setTargetDAGCombine(ISD::FNEG);
1980 setTargetDAGCombine(ISD::FMA);
1981 setTargetDAGCombine(ISD::STRICT_FMA);
1982 setTargetDAGCombine(ISD::FMINNUM);
1983 setTargetDAGCombine(ISD::FMAXNUM);
1984 setTargetDAGCombine(ISD::SUB);
1985 setTargetDAGCombine(ISD::LOAD);
1986 setTargetDAGCombine(ISD::MLOAD);
1987 setTargetDAGCombine(ISD::STORE);
1988 setTargetDAGCombine(ISD::MSTORE);
1989 setTargetDAGCombine(ISD::TRUNCATE);
1990 setTargetDAGCombine(ISD::ZERO_EXTEND);
1991 setTargetDAGCombine(ISD::ANY_EXTEND);
1992 setTargetDAGCombine(ISD::SIGN_EXTEND);
1993 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1994 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
1995 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1996 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1997 setTargetDAGCombine(ISD::SINT_TO_FP);
1998 setTargetDAGCombine(ISD::UINT_TO_FP);
1999 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2000 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2001 setTargetDAGCombine(ISD::SETCC);
2002 setTargetDAGCombine(ISD::MUL);
2003 setTargetDAGCombine(ISD::XOR);
2004 setTargetDAGCombine(ISD::MSCATTER);
2005 setTargetDAGCombine(ISD::MGATHER);
2006 setTargetDAGCombine(ISD::FP16_TO_FP);
2007 setTargetDAGCombine(ISD::FP_EXTEND);
2008 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
2009 setTargetDAGCombine(ISD::FP_ROUND);
2010
2011 computeRegisterProperties(Subtarget.getRegisterInfo());
2012
2013 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2014 MaxStoresPerMemsetOptSize = 8;
2015 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2016 MaxStoresPerMemcpyOptSize = 4;
2017 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2018 MaxStoresPerMemmoveOptSize = 4;
2019
2020 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2021 // that needs to benchmarked and balanced with the potential use of vector
2022 // load/store types (PR33329, PR33914).
2023 MaxLoadsPerMemcmp = 2;
2024 MaxLoadsPerMemcmpOptSize = 2;
2025
2026 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
2027 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
2028
2029 // An out-of-order CPU can speculatively execute past a predictable branch,
2030 // but a conditional move could be stalled by an expensive earlier operation.
2031 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2032 EnableExtLdPromotion = true;
2033 setPrefFunctionAlignment(Align(16));
2034
2035 verifyIntrinsicTables();
2036
2037 // Default to having -disable-strictnode-mutation on
2038 IsStrictFPEnabled = true;
2039}
2040
2041// This has so far only been implemented for 64-bit MachO.
2042bool X86TargetLowering::useLoadStackGuardNode() const {
2043 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2044}
2045
2046bool X86TargetLowering::useStackGuardXorFP() const {
2047 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2048 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2049}
2050
2051SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2052 const SDLoc &DL) const {
2053 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2054 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2055 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2056 return SDValue(Node, 0);
2057}
2058
2059TargetLoweringBase::LegalizeTypeAction
2060X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2061 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2062 !Subtarget.hasBWI())
2063 return TypeSplitVector;
2064
2065 if (VT.getVectorNumElements() != 1 &&
2066 VT.getVectorElementType() != MVT::i1)
2067 return TypeWidenVector;
2068
2069 return TargetLoweringBase::getPreferredVectorAction(VT);
2070}
2071
2072static std::pair<MVT, unsigned>
2073handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2074 const X86Subtarget &Subtarget) {
2075 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2076 // convention is one that uses k registers.
2077 if (NumElts == 2)
2078 return {MVT::v2i64, 1};
2079 if (NumElts == 4)
2080 return {MVT::v4i32, 1};
2081 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2082 CC != CallingConv::Intel_OCL_BI)
2083 return {MVT::v8i16, 1};
2084 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2085 CC != CallingConv::Intel_OCL_BI)
2086 return {MVT::v16i8, 1};
2087 // v32i1 passes in ymm unless we have BWI and the calling convention is
2088 // regcall.
2089 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2090 return {MVT::v32i8, 1};
2091 // Split v64i1 vectors if we don't have v64i8 available.
2092 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2093 if (Subtarget.useAVX512Regs())
2094 return {MVT::v64i8, 1};
2095 return {MVT::v32i8, 2};
2096 }
2097
2098 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2099 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2100 NumElts > 64)
2101 return {MVT::i8, NumElts};
2102
2103 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2104}
2105
2106MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2107 CallingConv::ID CC,
2108 EVT VT) const {
2109 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2110 Subtarget.hasAVX512()) {
2111 unsigned NumElts = VT.getVectorNumElements();
2112
2113 MVT RegisterVT;
2114 unsigned NumRegisters;
2115 std::tie(RegisterVT, NumRegisters) =
2116 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2117 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2118 return RegisterVT;
2119 }
2120
2121 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2122}
2123
2124unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2125 CallingConv::ID CC,
2126 EVT VT) const {
2127 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2128 Subtarget.hasAVX512()) {
2129 unsigned NumElts = VT.getVectorNumElements();
2130
2131 MVT RegisterVT;
2132 unsigned NumRegisters;
2133 std::tie(RegisterVT, NumRegisters) =
2134 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2135 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2136 return NumRegisters;
2137 }
2138
2139 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2140}
2141
2142unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2143 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2144 unsigned &NumIntermediates, MVT &RegisterVT) const {
2145 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2146 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2147 Subtarget.hasAVX512() &&
2148 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2149 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2150 VT.getVectorNumElements() > 64)) {
2151 RegisterVT = MVT::i8;
2152 IntermediateVT = MVT::i1;
2153 NumIntermediates = VT.getVectorNumElements();
2154 return NumIntermediates;
2155 }
2156
2157 // Split v64i1 vectors if we don't have v64i8 available.
2158 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2159 CC != CallingConv::X86_RegCall) {
2160 RegisterVT = MVT::v32i8;
2161 IntermediateVT = MVT::v32i1;
2162 NumIntermediates = 2;
2163 return 2;
2164 }
2165
2166 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2167 NumIntermediates, RegisterVT);
2168}
2169
2170EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2171 LLVMContext& Context,
2172 EVT VT) const {
2173 if (!VT.isVector())
2174 return MVT::i8;
2175
2176 if (Subtarget.hasAVX512()) {
2177 const unsigned NumElts = VT.getVectorNumElements();
2178
2179 // Figure out what this type will be legalized to.
2180 EVT LegalVT = VT;
2181 while (getTypeAction(Context, LegalVT) != TypeLegal)
2182 LegalVT = getTypeToTransformTo(Context, LegalVT);
2183
2184 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2185 if (LegalVT.getSimpleVT().is512BitVector())
2186 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2187
2188 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2189 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2190 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2191 // vXi16/vXi8.
2192 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2193 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2194 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2195 }
2196 }
2197
2198 return VT.changeVectorElementTypeToInteger();
2199}
2200
2201/// Helper for getByValTypeAlignment to determine
2202/// the desired ByVal argument alignment.
2203static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2204 if (MaxAlign == 16)
2205 return;
2206 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2207 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2208 MaxAlign = Align(16);
2209 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2210 Align EltAlign;
2211 getMaxByValAlign(ATy->getElementType(), EltAlign);
2212 if (EltAlign > MaxAlign)
2213 MaxAlign = EltAlign;
2214 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2215 for (auto *EltTy : STy->elements()) {
2216 Align EltAlign;
2217 getMaxByValAlign(EltTy, EltAlign);
2218 if (EltAlign > MaxAlign)
2219 MaxAlign = EltAlign;
2220 if (MaxAlign == 16)
2221 break;
2222 }
2223 }
2224}
2225
2226/// Return the desired alignment for ByVal aggregate
2227/// function arguments in the caller parameter area. For X86, aggregates
2228/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2229/// are at 4-byte boundaries.
2230unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2231 const DataLayout &DL) const {
2232 if (Subtarget.is64Bit()) {
2233 // Max of 8 and alignment of type.
2234 Align TyAlign = DL.getABITypeAlign(Ty);
2235 if (TyAlign > 8)
2236 return TyAlign.value();
2237 return 8;
2238 }
2239
2240 Align Alignment(4);
2241 if (Subtarget.hasSSE1())
2242 getMaxByValAlign(Ty, Alignment);
2243 return Alignment.value();
2244}
2245
2246/// It returns EVT::Other if the type should be determined using generic
2247/// target-independent logic.
2248/// For vector ops we check that the overall size isn't larger than our
2249/// preferred vector width.
2250EVT X86TargetLowering::getOptimalMemOpType(
2251 const MemOp &Op, const AttributeList &FuncAttributes) const {
2252 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2253 if (Op.size() >= 16 &&
2254 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2255 // FIXME: Check if unaligned 64-byte accesses are slow.
2256 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2257 (Subtarget.getPreferVectorWidth() >= 512)) {
2258 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2259 }
2260 // FIXME: Check if unaligned 32-byte accesses are slow.
2261 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2262 (Subtarget.getPreferVectorWidth() >= 256)) {
2263 // Although this isn't a well-supported type for AVX1, we'll let
2264 // legalization and shuffle lowering produce the optimal codegen. If we
2265 // choose an optimal type with a vector element larger than a byte,
2266 // getMemsetStores() may create an intermediate splat (using an integer
2267 // multiply) before we splat as a vector.
2268 return MVT::v32i8;
2269 }
2270 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2271 return MVT::v16i8;
2272 // TODO: Can SSE1 handle a byte vector?
2273 // If we have SSE1 registers we should be able to use them.
2274 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2275 (Subtarget.getPreferVectorWidth() >= 128))
2276 return MVT::v4f32;
2277 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2278 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2279 // Do not use f64 to lower memcpy if source is string constant. It's
2280 // better to use i32 to avoid the loads.
2281 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2282 // The gymnastics of splatting a byte value into an XMM register and then
2283 // only using 8-byte stores (because this is a CPU with slow unaligned
2284 // 16-byte accesses) makes that a loser.
2285 return MVT::f64;
2286 }
2287 }
2288 // This is a compromise. If we reach here, unaligned accesses may be slow on
2289 // this target. However, creating smaller, aligned accesses could be even
2290 // slower and would certainly be a lot more code.
2291 if (Subtarget.is64Bit() && Op.size() >= 8)
2292 return MVT::i64;
2293 return MVT::i32;
2294}
2295
2296bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2297 if (VT == MVT::f32)
2298 return X86ScalarSSEf32;
2299 else if (VT == MVT::f64)
2300 return X86ScalarSSEf64;
2301 return true;
2302}
2303
2304bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2305 EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
2306 bool *Fast) const {
2307 if (Fast) {
2308 switch (VT.getSizeInBits()) {
2309 default:
2310 // 8-byte and under are always assumed to be fast.
2311 *Fast = true;
2312 break;
2313 case 128:
2314 *Fast = !Subtarget.isUnalignedMem16Slow();
2315 break;
2316 case 256:
2317 *Fast = !Subtarget.isUnalignedMem32Slow();
2318 break;
2319 // TODO: What about AVX-512 (512-bit) accesses?
2320 }
2321 }
2322 // NonTemporal vector memory ops must be aligned.
2323 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2324 // NT loads can only be vector aligned, so if its less aligned than the
2325 // minimum vector size (which we can split the vector down to), we might as
2326 // well use a regular unaligned vector load.
2327 // We don't have any NT loads pre-SSE41.
2328 if (!!(Flags & MachineMemOperand::MOLoad))
2329 return (Align < 16 || !Subtarget.hasSSE41());
2330 return false;
2331 }
2332 // Misaligned accesses of any size are always allowed.
2333 return true;
2334}
2335
2336/// Return the entry encoding for a jump table in the
2337/// current function. The returned value is a member of the
2338/// MachineJumpTableInfo::JTEntryKind enum.
2339unsigned X86TargetLowering::getJumpTableEncoding() const {
2340 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2341 // symbol.
2342 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2343 return MachineJumpTableInfo::EK_Custom32;
2344
2345 // Otherwise, use the normal jump table encoding heuristics.
2346 return TargetLowering::getJumpTableEncoding();
2347}
2348
2349bool X86TargetLowering::useSoftFloat() const {
2350 return Subtarget.useSoftFloat();
2351}
2352
2353void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2354 ArgListTy &Args) const {
2355
2356 // Only relabel X86-32 for C / Stdcall CCs.
2357 if (Subtarget.is64Bit())
2358 return;
2359 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2360 return;
2361 unsigned ParamRegs = 0;
2362 if (auto *M = MF->getFunction().getParent())
2363 ParamRegs = M->getNumberRegisterParameters();
2364
2365 // Mark the first N int arguments as having reg
2366 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2367 Type *T = Args[Idx].Ty;
2368 if (T->isIntOrPtrTy())
2369 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2370 unsigned numRegs = 1;
2371 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2372 numRegs = 2;
2373 if (ParamRegs < numRegs)
2374 return;
2375 ParamRegs -= numRegs;
2376 Args[Idx].IsInReg = true;
2377 }
2378 }
2379}
2380
2381const MCExpr *
2382X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2383 const MachineBasicBlock *MBB,
2384 unsigned uid,MCContext &Ctx) const{
2385 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2385, __PRETTY_FUNCTION__))
;
2386 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2387 // entries.
2388 return MCSymbolRefExpr::create(MBB->getSymbol(),
2389 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2390}
2391
2392/// Returns relocation base for the given PIC jumptable.
2393SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2394 SelectionDAG &DAG) const {
2395 if (!Subtarget.is64Bit())
2396 // This doesn't have SDLoc associated with it, but is not really the
2397 // same as a Register.
2398 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2399 getPointerTy(DAG.getDataLayout()));
2400 return Table;
2401}
2402
2403/// This returns the relocation base for the given PIC jumptable,
2404/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2405const MCExpr *X86TargetLowering::
2406getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2407 MCContext &Ctx) const {
2408 // X86-64 uses RIP relative addressing based on the jump table label.
2409 if (Subtarget.isPICStyleRIPRel())
2410 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2411
2412 // Otherwise, the reference is relative to the PIC base.
2413 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2414}
2415
2416std::pair<const TargetRegisterClass *, uint8_t>
2417X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2418 MVT VT) const {
2419 const TargetRegisterClass *RRC = nullptr;
2420 uint8_t Cost = 1;
2421 switch (VT.SimpleTy) {
2422 default:
2423 return TargetLowering::findRepresentativeClass(TRI, VT);
2424 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2425 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2426 break;
2427 case MVT::x86mmx:
2428 RRC = &X86::VR64RegClass;
2429 break;
2430 case MVT::f32: case MVT::f64:
2431 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2432 case MVT::v4f32: case MVT::v2f64:
2433 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2434 case MVT::v8f32: case MVT::v4f64:
2435 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2436 case MVT::v16f32: case MVT::v8f64:
2437 RRC = &X86::VR128XRegClass;
2438 break;
2439 }
2440 return std::make_pair(RRC, Cost);
2441}
2442
2443unsigned X86TargetLowering::getAddressSpace() const {
2444 if (Subtarget.is64Bit())
2445 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2446 return 256;
2447}
2448
2449static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2450 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2451 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2452}
2453
2454static Constant* SegmentOffset(IRBuilder<> &IRB,
2455 unsigned Offset, unsigned AddressSpace) {
2456 return ConstantExpr::getIntToPtr(
2457 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2458 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2459}
2460
2461Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2462 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2463 // tcbhead_t; use it instead of the usual global variable (see
2464 // sysdeps/{i386,x86_64}/nptl/tls.h)
2465 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2466 if (Subtarget.isTargetFuchsia()) {
2467 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2468 return SegmentOffset(IRB, 0x10, getAddressSpace());
2469 } else {
2470 // %fs:0x28, unless we're using a Kernel code model, in which case
2471 // it's %gs:0x28. gs:0x14 on i386.
2472 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2473 return SegmentOffset(IRB, Offset, getAddressSpace());
2474 }
2475 }
2476
2477 return TargetLowering::getIRStackGuard(IRB);
2478}
2479
2480void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2481 // MSVC CRT provides functionalities for stack protection.
2482 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2483 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2484 // MSVC CRT has a global variable holding security cookie.
2485 M.getOrInsertGlobal("__security_cookie",
2486 Type::getInt8PtrTy(M.getContext()));
2487
2488 // MSVC CRT has a function to validate security cookie.
2489 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2490 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2491 Type::getInt8PtrTy(M.getContext()));
2492 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2493 F->setCallingConv(CallingConv::X86_FastCall);
2494 F->addAttribute(1, Attribute::AttrKind::InReg);
2495 }
2496 return;
2497 }
2498 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2499 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2500 return;
2501 TargetLowering::insertSSPDeclarations(M);
2502}
2503
2504Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2505 // MSVC CRT has a global variable holding security cookie.
2506 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2507 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2508 return M.getGlobalVariable("__security_cookie");
2509 }
2510 return TargetLowering::getSDagStackGuard(M);
2511}
2512
2513Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2514 // MSVC CRT has a function to validate security cookie.
2515 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2516 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2517 return M.getFunction("__security_check_cookie");
2518 }
2519 return TargetLowering::getSSPStackGuardCheck(M);
2520}
2521
2522Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2523 if (Subtarget.getTargetTriple().isOSContiki())
2524 return getDefaultSafeStackPointerLocation(IRB, false);
2525
2526 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2527 // definition of TLS_SLOT_SAFESTACK in
2528 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2529 if (Subtarget.isTargetAndroid()) {
2530 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2531 // %gs:0x24 on i386
2532 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2533 return SegmentOffset(IRB, Offset, getAddressSpace());
2534 }
2535
2536 // Fuchsia is similar.
2537 if (Subtarget.isTargetFuchsia()) {
2538 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2539 return SegmentOffset(IRB, 0x18, getAddressSpace());
2540 }
2541
2542 return TargetLowering::getSafeStackPointerLocation(IRB);
2543}
2544
2545//===----------------------------------------------------------------------===//
2546// Return Value Calling Convention Implementation
2547//===----------------------------------------------------------------------===//
2548
2549bool X86TargetLowering::CanLowerReturn(
2550 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2551 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2552 SmallVector<CCValAssign, 16> RVLocs;
2553 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2554 return CCInfo.CheckReturn(Outs, RetCC_X86);
2555}
2556
2557const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2558 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2559 return ScratchRegs;
2560}
2561
2562/// Lowers masks values (v*i1) to the local register values
2563/// \returns DAG node after lowering to register type
2564static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2565 const SDLoc &Dl, SelectionDAG &DAG) {
2566 EVT ValVT = ValArg.getValueType();
2567
2568 if (ValVT == MVT::v1i1)
2569 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2570 DAG.getIntPtrConstant(0, Dl));
2571
2572 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2573 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2574 // Two stage lowering might be required
2575 // bitcast: v8i1 -> i8 / v16i1 -> i16
2576 // anyextend: i8 -> i32 / i16 -> i32
2577 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2578 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2579 if (ValLoc == MVT::i32)
2580 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2581 return ValToCopy;
2582 }
2583
2584 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2585 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2586 // One stage lowering is required
2587 // bitcast: v32i1 -> i32 / v64i1 -> i64
2588 return DAG.getBitcast(ValLoc, ValArg);
2589 }
2590
2591 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2592}
2593
2594/// Breaks v64i1 value into two registers and adds the new node to the DAG
2595static void Passv64i1ArgInRegs(
2596 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2597 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2598 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2599 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2599, __PRETTY_FUNCTION__))
;
2600 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2600, __PRETTY_FUNCTION__))
;
2601 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2601, __PRETTY_FUNCTION__))
;
2602 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2603, __PRETTY_FUNCTION__))
2603 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2603, __PRETTY_FUNCTION__))
;
2604
2605 // Before splitting the value we cast it to i64
2606 Arg = DAG.getBitcast(MVT::i64, Arg);
2607
2608 // Splitting the value into two i32 types
2609 SDValue Lo, Hi;
2610 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2611 DAG.getConstant(0, Dl, MVT::i32));
2612 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2613 DAG.getConstant(1, Dl, MVT::i32));
2614
2615 // Attach the two i32 types into corresponding registers
2616 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2617 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2618}
2619
2620SDValue
2621X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2622 bool isVarArg,
2623 const SmallVectorImpl<ISD::OutputArg> &Outs,
2624 const SmallVectorImpl<SDValue> &OutVals,
2625 const SDLoc &dl, SelectionDAG &DAG) const {
2626 MachineFunction &MF = DAG.getMachineFunction();
2627 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2628
2629 // In some cases we need to disable registers from the default CSR list.
2630 // For example, when they are used for argument passing.
2631 bool ShouldDisableCalleeSavedRegister =
2632 CallConv == CallingConv::X86_RegCall ||
2633 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2634
2635 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2636 report_fatal_error("X86 interrupts may not return any value");
2637
2638 SmallVector<CCValAssign, 16> RVLocs;
2639 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2640 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2641
2642 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2643 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2644 ++I, ++OutsIndex) {
2645 CCValAssign &VA = RVLocs[I];
2646 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2646, __PRETTY_FUNCTION__))
;
2647
2648 // Add the register to the CalleeSaveDisableRegs list.
2649 if (ShouldDisableCalleeSavedRegister)
2650 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2651
2652 SDValue ValToCopy = OutVals[OutsIndex];
2653 EVT ValVT = ValToCopy.getValueType();
2654
2655 // Promote values to the appropriate types.
2656 if (VA.getLocInfo() == CCValAssign::SExt)
2657 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2658 else if (VA.getLocInfo() == CCValAssign::ZExt)
2659 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2660 else if (VA.getLocInfo() == CCValAssign::AExt) {
2661 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2662 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2663 else
2664 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2665 }
2666 else if (VA.getLocInfo() == CCValAssign::BCvt)
2667 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2668
2669 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2670, __PRETTY_FUNCTION__))
2670 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2670, __PRETTY_FUNCTION__))
;
2671
2672 // Report an error if we have attempted to return a value via an XMM
2673 // register and SSE was disabled.
2674 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2675 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2676 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2677 } else if (!Subtarget.hasSSE2() &&
2678 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2679 ValVT == MVT::f64) {
2680 // When returning a double via an XMM register, report an error if SSE2 is
2681 // not enabled.
2682 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2683 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2684 }
2685
2686 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2687 // the RET instruction and handled by the FP Stackifier.
2688 if (VA.getLocReg() == X86::FP0 ||
2689 VA.getLocReg() == X86::FP1) {
2690 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2691 // change the value to the FP stack register class.
2692 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2693 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2694 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2695 // Don't emit a copytoreg.
2696 continue;
2697 }
2698
2699 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2700 // which is returned in RAX / RDX.
2701 if (Subtarget.is64Bit()) {
2702 if (ValVT == MVT::x86mmx) {
2703 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2704 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2705 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2706 ValToCopy);
2707 // If we don't have SSE2 available, convert to v4f32 so the generated
2708 // register is legal.
2709 if (!Subtarget.hasSSE2())
2710 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2711 }
2712 }
2713 }
2714
2715 if (VA.needsCustom()) {
2716 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2717, __PRETTY_FUNCTION__))
2717 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2717, __PRETTY_FUNCTION__))
;
2718
2719 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2720 Subtarget);
2721
2722 // Add the second register to the CalleeSaveDisableRegs list.
2723 if (ShouldDisableCalleeSavedRegister)
2724 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2725 } else {
2726 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2727 }
2728 }
2729
2730 SDValue Flag;
2731 SmallVector<SDValue, 6> RetOps;
2732 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2733 // Operand #1 = Bytes To Pop
2734 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2735 MVT::i32));
2736
2737 // Copy the result values into the output registers.
2738 for (auto &RetVal : RetVals) {
2739 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2740 RetOps.push_back(RetVal.second);
2741 continue; // Don't emit a copytoreg.
2742 }
2743
2744 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2745 Flag = Chain.getValue(1);
2746 RetOps.push_back(
2747 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
2748 }
2749
2750 // Swift calling convention does not require we copy the sret argument
2751 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2752
2753 // All x86 ABIs require that for returning structs by value we copy
2754 // the sret argument into %rax/%eax (depending on ABI) for the return.
2755 // We saved the argument into a virtual register in the entry block,
2756 // so now we copy the value out and into %rax/%eax.
2757 //
2758 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2759 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2760 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2761 // either case FuncInfo->setSRetReturnReg() will have been called.
2762 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
2763 // When we have both sret and another return value, we should use the
2764 // original Chain stored in RetOps[0], instead of the current Chain updated
2765 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2766
2767 // For the case of sret and another return value, we have
2768 // Chain_0 at the function entry
2769 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2770 // If we use Chain_1 in getCopyFromReg, we will have
2771 // Val = getCopyFromReg(Chain_1)
2772 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2773
2774 // getCopyToReg(Chain_0) will be glued together with
2775 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2776 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2777 // Data dependency from Unit B to Unit A due to usage of Val in
2778 // getCopyToReg(Chain_1, Val)
2779 // Chain dependency from Unit A to Unit B
2780
2781 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2782 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2783 getPointerTy(MF.getDataLayout()));
2784
2785 Register RetValReg
2786 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2787 X86::RAX : X86::EAX;
2788 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2789 Flag = Chain.getValue(1);
2790
2791 // RAX/EAX now acts like a return value.
2792 RetOps.push_back(
2793 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2794
2795 // Add the returned register to the CalleeSaveDisableRegs list.
2796 if (ShouldDisableCalleeSavedRegister)
2797 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2798 }
2799
2800 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2801 const MCPhysReg *I =
2802 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2803 if (I) {
2804 for (; *I; ++I) {
2805 if (X86::GR64RegClass.contains(*I))
2806 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2807 else
2808 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2808)
;
2809 }
2810 }
2811
2812 RetOps[0] = Chain; // Update chain.
2813
2814 // Add the flag if we have it.
2815 if (Flag.getNode())
2816 RetOps.push_back(Flag);
2817
2818 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2819 if (CallConv == CallingConv::X86_INTR)
2820 opcode = X86ISD::IRET;
2821 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2822}
2823
2824bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2825 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2826 return false;
2827
2828 SDValue TCChain = Chain;
2829 SDNode *Copy = *N->use_begin();
2830 if (Copy->getOpcode() == ISD::CopyToReg) {
2831 // If the copy has a glue operand, we conservatively assume it isn't safe to
2832 // perform a tail call.
2833 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2834 return false;
2835 TCChain = Copy->getOperand(0);
2836 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2837 return false;
2838
2839 bool HasRet = false;
2840 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2841 UI != UE; ++UI) {
2842 if (UI->getOpcode() != X86ISD::RET_FLAG)
2843 return false;
2844 // If we are returning more than one value, we can definitely
2845 // not make a tail call see PR19530
2846 if (UI->getNumOperands() > 4)
2847 return false;
2848 if (UI->getNumOperands() == 4 &&
2849 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2850 return false;
2851 HasRet = true;
2852 }
2853
2854 if (!HasRet)
2855 return false;
2856
2857 Chain = TCChain;
2858 return true;
2859}
2860
2861EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2862 ISD::NodeType ExtendKind) const {
2863 MVT ReturnMVT = MVT::i32;
2864
2865 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2866 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2867 // The ABI does not require i1, i8 or i16 to be extended.
2868 //
2869 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2870 // always extending i8/i16 return values, so keep doing that for now.
2871 // (PR26665).
2872 ReturnMVT = MVT::i8;
2873 }
2874
2875 EVT MinVT = getRegisterType(Context, ReturnMVT);
2876 return VT.bitsLT(MinVT) ? MinVT : VT;
2877}
2878
2879/// Reads two 32 bit registers and creates a 64 bit mask value.
2880/// \param VA The current 32 bit value that need to be assigned.
2881/// \param NextVA The next 32 bit value that need to be assigned.
2882/// \param Root The parent DAG node.
2883/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2884/// glue purposes. In the case the DAG is already using
2885/// physical register instead of virtual, we should glue
2886/// our new SDValue to InFlag SDvalue.
2887/// \return a new SDvalue of size 64bit.
2888static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2889 SDValue &Root, SelectionDAG &DAG,
2890 const SDLoc &Dl, const X86Subtarget &Subtarget,
2891 SDValue *InFlag = nullptr) {
2892 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2892, __PRETTY_FUNCTION__))
;
2893 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2893, __PRETTY_FUNCTION__))
;
2894 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2895, __PRETTY_FUNCTION__))
2895 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2895, __PRETTY_FUNCTION__))
;
2896 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2897, __PRETTY_FUNCTION__))
2897 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2897, __PRETTY_FUNCTION__))
;
2898 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2899, __PRETTY_FUNCTION__))
2899 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2899, __PRETTY_FUNCTION__))
;
2900
2901 SDValue Lo, Hi;
2902 SDValue ArgValueLo, ArgValueHi;
2903
2904 MachineFunction &MF = DAG.getMachineFunction();
2905 const TargetRegisterClass *RC = &X86::GR32RegClass;
2906
2907 // Read a 32 bit value from the registers.
2908 if (nullptr == InFlag) {
2909 // When no physical register is present,
2910 // create an intermediate virtual register.
2911 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
2912 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2913 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2914 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2915 } else {
2916 // When a physical register is available read the value from it and glue
2917 // the reads together.
2918 ArgValueLo =
2919 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2920 *InFlag = ArgValueLo.getValue(2);
2921 ArgValueHi =
2922 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2923 *InFlag = ArgValueHi.getValue(2);
2924 }
2925
2926 // Convert the i32 type into v32i1 type.
2927 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2928
2929 // Convert the i32 type into v32i1 type.
2930 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2931
2932 // Concatenate the two values together.
2933 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2934}
2935
2936/// The function will lower a register of various sizes (8/16/32/64)
2937/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2938/// \returns a DAG node contains the operand after lowering to mask type.
2939static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2940 const EVT &ValLoc, const SDLoc &Dl,
2941 SelectionDAG &DAG) {
2942 SDValue ValReturned = ValArg;
2943
2944 if (ValVT == MVT::v1i1)
2945 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2946
2947 if (ValVT == MVT::v64i1) {
2948 // In 32 bit machine, this case is handled by getv64i1Argument
2949 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2949, __PRETTY_FUNCTION__))
;
2950 // In 64 bit machine, There is no need to truncate the value only bitcast
2951 } else {
2952 MVT maskLen;
2953 switch (ValVT.getSimpleVT().SimpleTy) {
2954 case MVT::v8i1:
2955 maskLen = MVT::i8;
2956 break;
2957 case MVT::v16i1:
2958 maskLen = MVT::i16;
2959 break;
2960 case MVT::v32i1:
2961 maskLen = MVT::i32;
2962 break;
2963 default:
2964 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2964)
;
2965 }
2966
2967 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2968 }
2969 return DAG.getBitcast(ValVT, ValReturned);
2970}
2971
2972/// Lower the result values of a call into the
2973/// appropriate copies out of appropriate physical registers.
2974///
2975SDValue X86TargetLowering::LowerCallResult(
2976 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2977 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2978 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2979 uint32_t *RegMask) const {
2980
2981 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2982 // Assign locations to each value returned by this call.
2983 SmallVector<CCValAssign, 16> RVLocs;
2984 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2985 *DAG.getContext());
2986 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2987
2988 // Copy all of the result registers out of their specified physreg.
2989 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2990 ++I, ++InsIndex) {
2991 CCValAssign &VA = RVLocs[I];
2992 EVT CopyVT = VA.getLocVT();
2993
2994 // In some calling conventions we need to remove the used registers
2995 // from the register mask.
2996 if (RegMask) {
2997 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2998 SubRegs.isValid(); ++SubRegs)
2999 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3000 }
3001
3002 // Report an error if there was an attempt to return FP values via XMM
3003 // registers.
3004 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3005 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3006 if (VA.getLocReg() == X86::XMM1)
3007 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3008 else
3009 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3010 } else if (!Subtarget.hasSSE2() &&
3011 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3012 CopyVT == MVT::f64) {
3013 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3014 if (VA.getLocReg() == X86::XMM1)
3015 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3016 else
3017 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3018 }
3019
3020 // If we prefer to use the value in xmm registers, copy it out as f80 and
3021 // use a truncate to move it from fp stack reg to xmm reg.
3022 bool RoundAfterCopy = false;
3023 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3024 isScalarFPTypeInSSEReg(VA.getValVT())) {
3025 if (!Subtarget.hasX87())
3026 report_fatal_error("X87 register return with X87 disabled");
3027 CopyVT = MVT::f80;
3028 RoundAfterCopy = (CopyVT != VA.getLocVT());
3029 }
3030
3031 SDValue Val;
3032 if (VA.needsCustom()) {
3033 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3034, __PRETTY_FUNCTION__))
3034 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3034, __PRETTY_FUNCTION__))
;
3035 Val =
3036 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3037 } else {
3038 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3039 .getValue(1);
3040 Val = Chain.getValue(0);
3041 InFlag = Chain.getValue(2);
3042 }
3043
3044 if (RoundAfterCopy)
3045 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3046 // This truncation won't change the value.
3047 DAG.getIntPtrConstant(1, dl));
3048
3049 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
3050 if (VA.getValVT().isVector() &&
3051 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3052 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3053 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3054 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3055 } else
3056 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3057 }
3058
3059 if (VA.getLocInfo() == CCValAssign::BCvt)
3060 Val = DAG.getBitcast(VA.getValVT(), Val);
3061
3062 InVals.push_back(Val);
3063 }
3064
3065 return Chain;
3066}
3067
3068//===----------------------------------------------------------------------===//
3069// C & StdCall & Fast Calling Convention implementation
3070//===----------------------------------------------------------------------===//
3071// StdCall calling convention seems to be standard for many Windows' API
3072// routines and around. It differs from C calling convention just a little:
3073// callee should clean up the stack, not caller. Symbols should be also
3074// decorated in some fancy way :) It doesn't support any vector arguments.
3075// For info on fast calling convention see Fast Calling Convention (tail call)
3076// implementation LowerX86_32FastCCCallTo.
3077
3078/// CallIsStructReturn - Determines whether a call uses struct return
3079/// semantics.
3080enum StructReturnType {
3081 NotStructReturn,
3082 RegStructReturn,
3083 StackStructReturn
3084};
3085static StructReturnType
3086callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3087 if (Outs.empty())
3088 return NotStructReturn;
3089
3090 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3091 if (!Flags.isSRet())
3092 return NotStructReturn;
3093 if (Flags.isInReg() || IsMCU)
3094 return RegStructReturn;
3095 return StackStructReturn;
3096}
3097
3098/// Determines whether a function uses struct return semantics.
3099static StructReturnType
3100argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3101 if (Ins.empty())
3102 return NotStructReturn;
3103
3104 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3105 if (!Flags.isSRet())
3106 return NotStructReturn;
3107 if (Flags.isInReg() || IsMCU)
3108 return RegStructReturn;
3109 return StackStructReturn;
3110}
3111
3112/// Make a copy of an aggregate at address specified by "Src" to address
3113/// "Dst" with size and alignment information specified by the specific
3114/// parameter attribute. The copy will be passed as a byval function parameter.
3115static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3116 SDValue Chain, ISD::ArgFlagsTy Flags,
3117 SelectionDAG &DAG, const SDLoc &dl) {
3118 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3119
3120 return DAG.getMemcpy(
3121 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3122 /*isVolatile*/ false, /*AlwaysInline=*/true,
3123 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3124}
3125
3126/// Return true if the calling convention is one that we can guarantee TCO for.
3127static bool canGuaranteeTCO(CallingConv::ID CC) {
3128 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3129 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3130 CC == CallingConv::HHVM || CC == CallingConv::Tail);
3131}
3132
3133/// Return true if we might ever do TCO for calls with this calling convention.
3134static bool mayTailCallThisCC(CallingConv::ID CC) {
3135 switch (CC) {
3136 // C calling conventions:
3137 case CallingConv::C:
3138 case CallingConv::Win64:
3139 case CallingConv::X86_64_SysV:
3140 // Callee pop conventions:
3141 case CallingConv::X86_ThisCall:
3142 case CallingConv::X86_StdCall:
3143 case CallingConv::X86_VectorCall:
3144 case CallingConv::X86_FastCall:
3145 // Swift:
3146 case CallingConv::Swift:
3147 return true;
3148 default:
3149 return canGuaranteeTCO(CC);
3150 }
3151}
3152
3153/// Return true if the function is being made into a tailcall target by
3154/// changing its ABI.
3155static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3156 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
3157}
3158
3159bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3160 if (!CI->isTailCall())
3161 return false;
3162
3163 CallingConv::ID CalleeCC = CI->getCallingConv();
3164 if (!mayTailCallThisCC(CalleeCC))
3165 return false;
3166
3167 return true;
3168}
3169
3170SDValue
3171X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3172 const SmallVectorImpl<ISD::InputArg> &Ins,
3173 const SDLoc &dl, SelectionDAG &DAG,
3174 const CCValAssign &VA,
3175 MachineFrameInfo &MFI, unsigned i) const {
3176 // Create the nodes corresponding to a load from this parameter slot.
3177 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3178 bool AlwaysUseMutable = shouldGuaranteeTCO(
3179 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3180 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3181 EVT ValVT;
3182 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3183
3184 // If value is passed by pointer we have address passed instead of the value
3185 // itself. No need to extend if the mask value and location share the same
3186 // absolute size.
3187 bool ExtendedInMem =
3188 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3189 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3190
3191 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3192 ValVT = VA.getLocVT();
3193 else
3194 ValVT = VA.getValVT();
3195
3196 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3197 // changed with more analysis.
3198 // In case of tail call optimization mark all arguments mutable. Since they
3199 // could be overwritten by lowering of arguments in case of a tail call.
3200 if (Flags.isByVal()) {
3201 unsigned Bytes = Flags.getByValSize();
3202 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3203
3204 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3205 // can be improved with deeper analysis.
3206 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3207 /*isAliased=*/true);
3208 return DAG.getFrameIndex(FI, PtrVT);
3209 }
3210
3211 EVT ArgVT = Ins[i].ArgVT;
3212
3213 // If this is a vector that has been split into multiple parts, and the
3214 // scalar size of the parts don't match the vector element size, then we can't
3215 // elide the copy. The parts will have padding between them instead of being
3216 // packed like a vector.
3217 bool ScalarizedAndExtendedVector =
3218 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3219 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3220
3221 // This is an argument in memory. We might be able to perform copy elision.
3222 // If the argument is passed directly in memory without any extension, then we
3223 // can perform copy elision. Large vector types, for example, may be passed
3224 // indirectly by pointer.
3225 if (Flags.isCopyElisionCandidate() &&
3226 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3227 !ScalarizedAndExtendedVector) {
3228 SDValue PartAddr;
3229 if (Ins[i].PartOffset == 0) {
3230 // If this is a one-part value or the first part of a multi-part value,
3231 // create a stack object for the entire argument value type and return a
3232 // load from our portion of it. This assumes that if the first part of an
3233 // argument is in memory, the rest will also be in memory.
3234 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3235 /*IsImmutable=*/false);
3236 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3237 return DAG.getLoad(
3238 ValVT, dl, Chain, PartAddr,
3239 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3240 } else {
3241 // This is not the first piece of an argument in memory. See if there is
3242 // already a fixed stack object including this offset. If so, assume it
3243 // was created by the PartOffset == 0 branch above and create a load from
3244 // the appropriate offset into it.
3245 int64_t PartBegin = VA.getLocMemOffset();
3246 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3247 int FI = MFI.getObjectIndexBegin();
3248 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3249 int64_t ObjBegin = MFI.getObjectOffset(FI);
3250 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3251 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3252 break;
3253 }
3254 if (MFI.isFixedObjectIndex(FI)) {
3255 SDValue Addr =
3256 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3257 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3258 return DAG.getLoad(
3259 ValVT, dl, Chain, Addr,
3260 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3261 Ins[i].PartOffset));
3262 }
3263 }
3264 }
3265
3266 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3267 VA.getLocMemOffset(), isImmutable);
3268
3269 // Set SExt or ZExt flag.
3270 if (VA.getLocInfo() == CCValAssign::ZExt) {
3271 MFI.setObjectZExt(FI, true);
3272 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3273 MFI.setObjectSExt(FI, true);
3274 }
3275
3276 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3277 SDValue Val = DAG.getLoad(
3278 ValVT, dl, Chain, FIN,
3279 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3280 return ExtendedInMem
3281 ? (VA.getValVT().isVector()
3282 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3283 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3284 : Val;
3285}
3286
3287// FIXME: Get this from tablegen.
3288static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3289 const X86Subtarget &Subtarget) {
3290 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3290, __PRETTY_FUNCTION__))
;
3291
3292 if (Subtarget.isCallingConvWin64(CallConv)) {
3293 static const MCPhysReg GPR64ArgRegsWin64[] = {
3294 X86::RCX, X86::RDX, X86::R8, X86::R9
3295 };
3296 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3297 }
3298
3299 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3300 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3301 };
3302 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3303}
3304
3305// FIXME: Get this from tablegen.
3306static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3307 CallingConv::ID CallConv,
3308 const X86Subtarget &Subtarget) {
3309 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3309, __PRETTY_FUNCTION__))
;
3310 if (Subtarget.isCallingConvWin64(CallConv)) {
3311 // The XMM registers which might contain var arg parameters are shadowed
3312 // in their paired GPR. So we only need to save the GPR to their home
3313 // slots.
3314 // TODO: __vectorcall will change this.
3315 return None;
3316 }
3317
3318 const Function &F = MF.getFunction();
3319 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3320 bool isSoftFloat = Subtarget.useSoftFloat();
3321 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3322, __PRETTY_FUNCTION__))
3322 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3322, __PRETTY_FUNCTION__))
;
3323 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3324 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3325 // registers.
3326 return None;
3327
3328 static const MCPhysReg XMMArgRegs64Bit[] = {
3329 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3330 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3331 };
3332 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3333}
3334
3335#ifndef NDEBUG
3336static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3337 return llvm::is_sorted(
3338 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3339 return A.getValNo() < B.getValNo();
3340 });
3341}
3342#endif
3343
3344namespace {
3345/// This is a helper class for lowering variable arguments parameters.
3346class VarArgsLoweringHelper {
3347public:
3348 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3349 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3350 CallingConv::ID CallConv, CCState &CCInfo)
3351 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3352 TheMachineFunction(DAG.getMachineFunction()),
3353 TheFunction(TheMachineFunction.getFunction()),
3354 FrameInfo(TheMachineFunction.getFrameInfo()),
3355 FrameLowering(*Subtarget.getFrameLowering()),
3356 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3357 CCInfo(CCInfo) {}
3358
3359 // Lower variable arguments parameters.
3360 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3361
3362private:
3363 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3364
3365 void forwardMustTailParameters(SDValue &Chain);
3366
3367 bool is64Bit() { return Subtarget.is64Bit(); }
3368 bool isWin64() { return Subtarget.isCallingConvWin64(CallConv); }
3369
3370 X86MachineFunctionInfo *FuncInfo;
3371 const SDLoc &DL;
3372 SelectionDAG &DAG;
3373 const X86Subtarget &Subtarget;
3374 MachineFunction &TheMachineFunction;
3375 const Function &TheFunction;
3376 MachineFrameInfo &FrameInfo;
3377 const TargetFrameLowering &FrameLowering;
3378 const TargetLowering &TargLowering;
3379 CallingConv::ID CallConv;
3380 CCState &CCInfo;
3381};
3382} // namespace
3383
3384void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3385 SDValue &Chain, unsigned StackSize) {
3386 // If the function takes variable number of arguments, make a frame index for
3387 // the start of the first vararg value... for expansion of llvm.va_start. We
3388 // can skip this if there are no va_start calls.
3389 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3390 CallConv != CallingConv::X86_ThisCall)) {
3391 FuncInfo->setVarArgsFrameIndex(
3392 FrameInfo.CreateFixedObject(1, StackSize, true));
3393 }
3394
3395 // Figure out if XMM registers are in use.
3396 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3398, __PRETTY_FUNCTION__))
3397 TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3398, __PRETTY_FUNCTION__))
3398 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3398, __PRETTY_FUNCTION__))
;
3399
3400 // 64-bit calling conventions support varargs and register parameters, so we
3401 // have to do extra work to spill them in the prologue.
3402 if (is64Bit()) {
3403 // Find the first unallocated argument registers.
3404 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3405 ArrayRef<MCPhysReg> ArgXMMs =
3406 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3407 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3408 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3409
3410 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3411, __PRETTY_FUNCTION__))
3411 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3411, __PRETTY_FUNCTION__))
;
3412
3413 if (isWin64()) {
3414 // Get to the caller-allocated home save location. Add 8 to account
3415 // for the return address.
3416 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3417 FuncInfo->setRegSaveFrameIndex(
3418 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3419 // Fixup to set vararg frame on shadow area (4 x i64).
3420 if (NumIntRegs < 4)
3421 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3422 } else {
3423 // For X86-64, if there are vararg parameters that are passed via
3424 // registers, then we must store them to their spots on the stack so
3425 // they may be loaded by dereferencing the result of va_next.
3426 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3427 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3428 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3429 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3430 }
3431
3432 SmallVector<SDValue, 6>
3433 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3434 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3435 // keeping live input value
3436 SDValue ALVal; // if applicable keeps SDValue for %al register
3437
3438 // Gather all the live in physical registers.
3439 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3440 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3441 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3442 }
3443 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3444 if (!AvailableXmms.empty()) {
3445 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3446 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3447 for (MCPhysReg Reg : AvailableXmms) {
3448 Register XMMReg = TheMachineFunction.addLiveIn(Reg, &X86::VR128RegClass);
3449 LiveXMMRegs.push_back(
3450 DAG.getCopyFromReg(Chain, DL, XMMReg, MVT::v4f32));
3451 }
3452 }
3453
3454 // Store the integer parameter registers.
3455 SmallVector<SDValue, 8> MemOps;
3456 SDValue RSFIN =
3457 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3458 TargLowering.getPointerTy(DAG.getDataLayout()));
3459 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3460 for (SDValue Val : LiveGPRs) {
3461 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3462 TargLowering.getPointerTy(DAG.getDataLayout()),
3463 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3464 SDValue Store =
3465 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3466 MachinePointerInfo::getFixedStack(
3467 DAG.getMachineFunction(),
3468 FuncInfo->getRegSaveFrameIndex(), Offset));
3469 MemOps.push_back(Store);
3470 Offset += 8;
3471 }
3472
3473 // Now store the XMM (fp + vector) parameter registers.
3474 if (!LiveXMMRegs.empty()) {
3475 SmallVector<SDValue, 12> SaveXMMOps;
3476 SaveXMMOps.push_back(Chain);
3477 SaveXMMOps.push_back(ALVal);
3478 SaveXMMOps.push_back(
3479 DAG.getIntPtrConstant(FuncInfo->getRegSaveFrameIndex(), DL));
3480 SaveXMMOps.push_back(
3481 DAG.getIntPtrConstant(FuncInfo->getVarArgsFPOffset(), DL));
3482 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3483 LiveXMMRegs.end());
3484 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
3485 MVT::Other, SaveXMMOps));
3486 }
3487
3488 if (!MemOps.empty())
3489 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3490 }
3491}
3492
3493void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3494 // Find the largest legal vector type.
3495 MVT VecVT = MVT::Other;
3496 // FIXME: Only some x86_32 calling conventions support AVX512.
3497 if (Subtarget.useAVX512Regs() &&
3498 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3499 CallConv == CallingConv::Intel_OCL_BI)))
3500 VecVT = MVT::v16f32;
3501 else if (Subtarget.hasAVX())
3502 VecVT = MVT::v8f32;
3503 else if (Subtarget.hasSSE2())
3504 VecVT = MVT::v4f32;
3505
3506 // We forward some GPRs and some vector types.
3507 SmallVector<MVT, 2> RegParmTypes;
3508 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3509 RegParmTypes.push_back(IntVT);
3510 if (VecVT != MVT::Other)
3511 RegParmTypes.push_back(VecVT);
3512
3513 // Compute the set of forwarded registers. The rest are scratch.
3514 SmallVectorImpl<ForwardedRegister> &Forwards =
3515 FuncInfo->getForwardedMustTailRegParms();
3516 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3517
3518 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3519 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3520 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3521 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3522 }
3523
3524 // Copy all forwards from physical to virtual registers.
3525 for (ForwardedRegister &FR : Forwards) {
3526 // FIXME: Can we use a less constrained schedule?
3527 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3528 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3529 TargLowering.getRegClassFor(FR.VT));
3530 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3531 }
3532}
3533
3534void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3535 unsigned StackSize) {
3536 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3537 // If necessary, it would be set into the correct value later.
3538 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3539 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3540
3541 if (FrameInfo.hasVAStart())
3542 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3543
3544 if (FrameInfo.hasMustTailInVarArgFunc())
3545 forwardMustTailParameters(Chain);
3546}
3547
3548SDValue X86TargetLowering::LowerFormalArguments(
3549 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3550 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3551 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3552 MachineFunction &MF = DAG.getMachineFunction();
3553 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3554
3555 const Function &F = MF.getFunction();
3556 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3557 F.getName() == "main")
3558 FuncInfo->setForceFramePointer(true);
3559
3560 MachineFrameInfo &MFI = MF.getFrameInfo();
3561 bool Is64Bit = Subtarget.is64Bit();
3562 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3563
3564 assert(((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3566, __PRETTY_FUNCTION__))
3565 !(IsVarArg && canGuaranteeTCO(CallConv)) &&((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3566, __PRETTY_FUNCTION__))
3566 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3566, __PRETTY_FUNCTION__))
;
3567
3568 // Assign locations to all of the incoming arguments.
3569 SmallVector<CCValAssign, 16> ArgLocs;
3570 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3571
3572 // Allocate shadow area for Win64.
3573 if (IsWin64)
3574 CCInfo.AllocateStack(32, Align(8));
3575
3576 CCInfo.AnalyzeArguments(Ins, CC_X86);
3577
3578 // In vectorcall calling convention a second pass is required for the HVA
3579 // types.
3580 if (CallingConv::X86_VectorCall == CallConv) {
3581 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3582 }
3583
3584 // The next loop assumes that the locations are in the same order of the
3585 // input arguments.
3586 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3587, __PRETTY_FUNCTION__))
3587 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3587, __PRETTY_FUNCTION__))
;
3588
3589 SDValue ArgValue;
3590 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3591 ++I, ++InsIndex) {
3592 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3592, __PRETTY_FUNCTION__))
;
3593 CCValAssign &VA = ArgLocs[I];
3594
3595 if (VA.isRegLoc()) {
3596 EVT RegVT = VA.getLocVT();
3597 if (VA.needsCustom()) {
3598 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
3599 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
3600 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
;
3601
3602 // v64i1 values, in regcall calling convention, that are
3603 // compiled to 32 bit arch, are split up into two registers.
3604 ArgValue =
3605 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3606 } else {
3607 const TargetRegisterClass *RC;
3608 if (RegVT == MVT::i8)
3609 RC = &X86::GR8RegClass;
3610 else if (RegVT == MVT::i16)
3611 RC = &X86::GR16RegClass;
3612 else if (RegVT == MVT::i32)
3613 RC = &X86::GR32RegClass;
3614 else if (Is64Bit && RegVT == MVT::i64)
3615 RC = &X86::GR64RegClass;
3616 else if (RegVT == MVT::f32)
3617 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3618 else if (RegVT == MVT::f64)
3619 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3620 else if (RegVT == MVT::f80)
3621 RC = &X86::RFP80RegClass;
3622 else if (RegVT == MVT::f128)
3623 RC = &X86::VR128RegClass;
3624 else if (RegVT.is512BitVector())
3625 RC = &X86::VR512RegClass;
3626 else if (RegVT.is256BitVector())
3627 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3628 else if (RegVT.is128BitVector())
3629 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3630 else if (RegVT == MVT::x86mmx)
3631 RC = &X86::VR64RegClass;
3632 else if (RegVT == MVT::v1i1)
3633 RC = &X86::VK1RegClass;
3634 else if (RegVT == MVT::v8i1)
3635 RC = &X86::VK8RegClass;
3636 else if (RegVT == MVT::v16i1)
3637 RC = &X86::VK16RegClass;
3638 else if (RegVT == MVT::v32i1)
3639 RC = &X86::VK32RegClass;
3640 else if (RegVT == MVT::v64i1)
3641 RC = &X86::VK64RegClass;
3642 else
3643 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3643)
;
3644
3645 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3646 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3647 }
3648
3649 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3650 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3651 // right size.
3652 if (VA.getLocInfo() == CCValAssign::SExt)
3653 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3654 DAG.getValueType(VA.getValVT()));
3655 else if (VA.getLocInfo() == CCValAssign::ZExt)
3656 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3657 DAG.getValueType(VA.getValVT()));
3658 else if (VA.getLocInfo() == CCValAssign::BCvt)
3659 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3660
3661 if (VA.isExtInLoc()) {
3662 // Handle MMX values passed in XMM regs.
3663 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3664 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3665 else if (VA.getValVT().isVector() &&
3666 VA.getValVT().getScalarType() == MVT::i1 &&
3667 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3668 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3669 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3670 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3671 } else
3672 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3673 }
3674 } else {
3675 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3675, __PRETTY_FUNCTION__))
;
3676 ArgValue =
3677 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3678 }
3679
3680 // If value is passed via pointer - do a load.
3681 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3682 ArgValue =
3683 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3684
3685 InVals.push_back(ArgValue);
3686 }
3687
3688 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3689 // Swift calling convention does not require we copy the sret argument
3690 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3691 if (CallConv == CallingConv::Swift)
3692 continue;
3693
3694 // All x86 ABIs require that for returning structs by value we copy the
3695 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3696 // the argument into a virtual register so that we can access it from the
3697 // return points.
3698 if (Ins[I].Flags.isSRet()) {
3699 Register Reg = FuncInfo->getSRetReturnReg();
3700 if (!Reg) {
3701 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3702 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3703 FuncInfo->setSRetReturnReg(Reg);
3704 }
3705 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3706 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3707 break;
3708 }
3709 }
3710
3711 unsigned StackSize = CCInfo.getNextStackOffset();
3712 // Align stack specially for tail calls.
3713 if (shouldGuaranteeTCO(CallConv,
3714 MF.getTarget().Options.GuaranteedTailCallOpt))
3715 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3716
3717 if (IsVarArg)
3718 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
3719 .lowerVarArgsParameters(Chain, StackSize);
3720
3721 // Some CCs need callee pop.
3722 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
3723 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3724 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3725 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3726 // X86 interrupts must pop the error code (and the alignment padding) if
3727 // present.
3728 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3729 } else {
3730 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3731 // If this is an sret function, the return should pop the hidden pointer.
3732 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3733 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3734 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3735 FuncInfo->setBytesToPopOnReturn(4);
3736 }
3737
3738 if (!Is64Bit) {
3739 // RegSaveFrameIndex is X86-64 only.
3740 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3741 }
3742
3743 FuncInfo->setArgumentStackSize(StackSize);
3744
3745 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3746 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3747 if (Personality == EHPersonality::CoreCLR) {
3748 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3748, __PRETTY_FUNCTION__))
;
3749 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3750 // that we'd prefer this slot be allocated towards the bottom of the frame
3751 // (i.e. near the stack pointer after allocating the frame). Every
3752 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3753 // offset from the bottom of this and each funclet's frame must be the
3754 // same, so the size of funclets' (mostly empty) frames is dictated by
3755 // how far this slot is from the bottom (since they allocate just enough
3756 // space to accommodate holding this slot at the correct offset).
3757 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSS=*/false);
3758 EHInfo->PSPSymFrameIdx = PSPSymFI;
3759 }
3760 }
3761
3762 if (CallConv == CallingConv::X86_RegCall ||
3763 F.hasFnAttribute("no_caller_saved_registers")) {
3764 MachineRegisterInfo &MRI = MF.getRegInfo();
3765 for (std::pair<Register, Register> Pair : MRI.liveins())
3766 MRI.disableCalleeSavedRegister(Pair.first);
3767 }
3768
3769 return Chain;
3770}
3771
3772SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3773 SDValue Arg, const SDLoc &dl,
3774 SelectionDAG &DAG,
3775 const CCValAssign &VA,
3776 ISD::ArgFlagsTy Flags,
3777 bool isByVal) const {
3778 unsigned LocMemOffset = VA.getLocMemOffset();
3779 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3780 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3781 StackPtr, PtrOff);
3782 if (isByVal)
3783 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3784
3785 return DAG.getStore(
3786 Chain, dl, Arg, PtrOff,
3787 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3788}
3789
3790/// Emit a load of return address if tail call
3791/// optimization is performed and it is required.
3792SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3793 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3794 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3795 // Adjust the Return address stack slot.
3796 EVT VT = getPointerTy(DAG.getDataLayout());
3797 OutRetAddr = getReturnAddressFrameIndex(DAG);
3798
3799 // Load the "old" Return address.
3800 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3801 return SDValue(OutRetAddr.getNode(), 1);
3802}
3803
3804/// Emit a store of the return address if tail call
3805/// optimization is performed and it is required (FPDiff!=0).
3806static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3807 SDValue Chain, SDValue RetAddrFrIdx,
3808 EVT PtrVT, unsigned SlotSize,
3809 int FPDiff, const SDLoc &dl) {
3810 // Store the return address to the appropriate stack slot.
3811 if (!FPDiff) return Chain;
3812 // Calculate the new stack slot for the return address.
3813 int NewReturnAddrFI =
3814 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3815 false);
3816 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3817 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3818 MachinePointerInfo::getFixedStack(
3819 DAG.getMachineFunction(), NewReturnAddrFI));
3820 return Chain;
3821}
3822
3823/// Returns a vector_shuffle mask for an movs{s|d}, movd
3824/// operation of specified width.
3825static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3826 SDValue V2) {
3827 unsigned NumElems = VT.getVectorNumElements();
3828 SmallVector<int, 8> Mask;
3829 Mask.push_back(NumElems);
3830 for (unsigned i = 1; i != NumElems; ++i)
3831 Mask.push_back(i);
3832 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3833}
3834
3835SDValue
3836X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3837 SmallVectorImpl<SDValue> &InVals) const {
3838 SelectionDAG &DAG = CLI.DAG;
3839 SDLoc &dl = CLI.DL;
3840 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3841 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3842 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3843 SDValue Chain = CLI.Chain;
3844 SDValue Callee = CLI.Callee;
3845 CallingConv::ID CallConv = CLI.CallConv;
3846 bool &isTailCall = CLI.IsTailCall;
3847 bool isVarArg = CLI.IsVarArg;
3848
3849 MachineFunction &MF = DAG.getMachineFunction();
3850 bool Is64Bit = Subtarget.is64Bit();
3851 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3852 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3853 bool IsSibcall = false;
3854 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3855 CallConv == CallingConv::Tail;
3856 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3857 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CB);
3858 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3859 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3860 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3861 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CB);
3862 bool HasNoCfCheck =
3863 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3864 const Module *M = MF.getMMI().getModule();
3865 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3866
3867 MachineFunction::CallSiteInfo CSInfo;
3868 if (CallConv == CallingConv::X86_INTR)
3869 report_fatal_error("X86 interrupts may not be called directly");
3870
3871 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
3872 // If we are using a GOT, disable tail calls to external symbols with
3873 // default visibility. Tail calling such a symbol requires using a GOT
3874 // relocation, which forces early binding of the symbol. This breaks code
3875 // that require lazy function symbol resolution. Using musttail or
3876 // GuaranteedTailCallOpt will override this.
3877 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3878 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3879 G->getGlobal()->hasDefaultVisibility()))
3880 isTailCall = false;
3881 }
3882
3883 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
3884 if (IsMustTail) {
3885 // Force this to be a tail call. The verifier rules are enough to ensure
3886 // that we can lower this successfully without moving the return address
3887 // around.
3888 isTailCall = true;
3889 } else if (isTailCall) {
3890 // Check if it's really possible to do a tail call.
3891 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3892 isVarArg, SR != NotStructReturn,
3893 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3894 Outs, OutVals, Ins, DAG);
3895
3896 // Sibcalls are automatically detected tailcalls which do not require
3897 // ABI changes.
3898 if (!IsGuaranteeTCO && isTailCall)
3899 IsSibcall = true;
3900
3901 if (isTailCall)
3902 ++NumTailCalls;
3903 }
3904
3905 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3906, __PRETTY_FUNCTION__))
3906 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3906, __PRETTY_FUNCTION__))
;
3907
3908 // Analyze operands of the call, assigning locations to each operand.
3909 SmallVector<CCValAssign, 16> ArgLocs;
3910 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3911
3912 // Allocate shadow area for Win64.
3913 if (IsWin64)
3914 CCInfo.AllocateStack(32, Align(8));
3915
3916 CCInfo.AnalyzeArguments(Outs, CC_X86);
3917
3918 // In vectorcall calling convention a second pass is required for the HVA
3919 // types.
3920 if (CallingConv::X86_VectorCall == CallConv) {
3921 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3922 }
3923
3924 // Get a count of how many bytes are to be pushed on the stack.
3925 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3926 if (IsSibcall)
3927 // This is a sibcall. The memory operands are available in caller's
3928 // own caller's stack.
3929 NumBytes = 0;
3930 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
3931 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3932
3933 int FPDiff = 0;
3934 if (isTailCall && !IsSibcall && !IsMustTail) {
3935 // Lower arguments at fp - stackoffset + fpdiff.
3936 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3937
3938 FPDiff = NumBytesCallerPushed - NumBytes;
3939
3940 // Set the delta of movement of the returnaddr stackslot.
3941 // But only set if delta is greater than previous delta.
3942 if (FPDiff < X86Info->getTCReturnAddrDelta())
3943 X86Info->setTCReturnAddrDelta(FPDiff);
3944 }
3945
3946 unsigned NumBytesToPush = NumBytes;
3947 unsigned NumBytesToPop = NumBytes;
3948
3949 // If we have an inalloca argument, all stack space has already been allocated
3950 // for us and be right at the top of the stack. We don't support multiple
3951 // arguments passed in memory when using inalloca.
3952 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3953 NumBytesToPush = 0;
3954 if (!ArgLocs.back().isMemLoc())
3955 report_fatal_error("cannot use inalloca attribute on a register "
3956 "parameter");
3957 if (ArgLocs.back().getLocMemOffset() != 0)
3958 report_fatal_error("any parameter with the inalloca attribute must be "
3959 "the only memory argument");
3960 } else if (CLI.IsPreallocated) {
3961 assert(ArgLocs.back().isMemLoc() &&((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3963, __PRETTY_FUNCTION__))
3962 "cannot use preallocated attribute on a register "((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3963, __PRETTY_FUNCTION__))
3963 "parameter")((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3963, __PRETTY_FUNCTION__))
;
3964 SmallVector<size_t, 4> PreallocatedOffsets;
3965 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
3966 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
3967 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
3968 }
3969 }
3970 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
3971 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
3972 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
3973 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
3974 NumBytesToPush = 0;
3975 }
3976
3977 if (!IsSibcall && !IsMustTail)
3978 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3979 NumBytes - NumBytesToPush, dl);
3980
3981 SDValue RetAddrFrIdx;
3982 // Load return address for tail calls.
3983 if (isTailCall && FPDiff)
3984 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3985 Is64Bit, FPDiff, dl);
3986
3987 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
3988 SmallVector<SDValue, 8> MemOpChains;
3989 SDValue StackPtr;
3990
3991 // The next loop assumes that the locations are in the same order of the
3992 // input arguments.
3993 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3994, __PRETTY_FUNCTION__))
3994 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3994, __PRETTY_FUNCTION__))
;
3995
3996 // Walk the register/memloc assignments, inserting copies/loads. In the case
3997 // of tail call optimization arguments are handle later.
3998 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3999 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4000 ++I, ++OutIndex) {
4001 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4001, __PRETTY_FUNCTION__))
;
4002 // Skip inalloca/preallocated arguments, they have already been written.
4003 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4004 if (Flags.isInAlloca() || Flags.isPreallocated())
4005 continue;
4006
4007 CCValAssign &VA = ArgLocs[I];
4008 EVT RegVT = VA.getLocVT();
4009 SDValue Arg = OutVals[OutIndex];
4010 bool isByVal = Flags.isByVal();
4011
4012 // Promote the value if needed.
4013 switch (VA.getLocInfo()) {
4014 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4014)
;
4015 case CCValAssign::Full: break;
4016 case CCValAssign::SExt:
4017 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4018 break;
4019 case CCValAssign::ZExt:
4020 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4021 break;
4022 case CCValAssign::AExt:
4023 if (Arg.getValueType().isVector() &&
4024 Arg.getValueType().getVectorElementType() == MVT::i1)
4025 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4026 else if (RegVT.is128BitVector()) {
4027 // Special case: passing MMX values in XMM registers.
4028 Arg = DAG.getBitcast(MVT::i64, Arg);
4029 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4030 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4031 } else
4032 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4033 break;
4034 case CCValAssign::BCvt:
4035 Arg = DAG.getBitcast(RegVT, Arg);
4036 break;
4037 case CCValAssign::Indirect: {
4038 if (isByVal) {
4039 // Memcpy the argument to a temporary stack slot to prevent
4040 // the caller from seeing any modifications the callee may make
4041 // as guaranteed by the `byval` attribute.
4042 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4043 Flags.getByValSize(),
4044 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4045 SDValue StackSlot =
4046 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4047 Chain =
4048 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4049 // From now on treat this as a regular pointer
4050 Arg = StackSlot;
4051 isByVal = false;
4052 } else {
4053 // Store the argument.
4054 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4055 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4056 Chain = DAG.getStore(
4057 Chain, dl, Arg, SpillSlot,
4058 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4059 Arg = SpillSlot;
4060 }
4061 break;
4062 }
4063 }
4064
4065 if (VA.needsCustom()) {
4066 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4067, __PRETTY_FUNCTION__))
4067 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4067, __PRETTY_FUNCTION__))
;
4068 // Split v64i1 value into two registers
4069 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4070 } else if (VA.isRegLoc()) {
4071 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4072 const TargetOptions &Options = DAG.getTarget().Options;
4073 if (Options.EmitCallSiteInfo)
4074 CSInfo.emplace_back(VA.getLocReg(), I);
4075 if (isVarArg && IsWin64) {
4076 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4077 // shadow reg if callee is a varargs function.
4078 Register ShadowReg;
4079 switch (VA.getLocReg()) {
4080 case X86::XMM0: ShadowReg = X86::RCX; break;
4081 case X86::XMM1: ShadowReg = X86::RDX; break;
4082 case X86::XMM2: ShadowReg = X86::R8; break;
4083 case X86::XMM3: ShadowReg = X86::R9; break;
4084 }
4085 if (ShadowReg)
4086 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4087 }
4088 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4089 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4089, __PRETTY_FUNCTION__))
;
4090 if (!StackPtr.getNode())
4091 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4092 getPointerTy(DAG.getDataLayout()));
4093 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4094 dl, DAG, VA, Flags, isByVal));
4095 }
4096 }
4097
4098 if (!MemOpChains.empty())
4099 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4100
4101 if (Subtarget.isPICStyleGOT()) {
4102 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4103 // GOT pointer.
4104 if (!isTailCall) {
4105 RegsToPass.push_back(std::make_pair(
4106 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4107 getPointerTy(DAG.getDataLayout()))));
4108 } else {
4109 // If we are tail calling and generating PIC/GOT style code load the
4110 // address of the callee into ECX. The value in ecx is used as target of
4111 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4112 // for tail calls on PIC/GOT architectures. Normally we would just put the
4113 // address of GOT into ebx and then call target@PLT. But for tail calls
4114 // ebx would be restored (since ebx is callee saved) before jumping to the
4115 // target@PLT.
4116
4117 // Note: The actual moving to ECX is done further down.
4118 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4119 if (G && !G->getGlobal()->hasLocalLinkage() &&
4120 G->getGlobal()->hasDefaultVisibility())
4121 Callee = LowerGlobalAddress(Callee, DAG);
4122 else if (isa<ExternalSymbolSDNode>(Callee))
4123 Callee = LowerExternalSymbol(Callee, DAG);
4124 }
4125 }
4126
4127 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4128 // From AMD64 ABI document:
4129 // For calls that may call functions that use varargs or stdargs
4130 // (prototype-less calls or calls to functions containing ellipsis (...) in
4131 // the declaration) %al is used as hidden argument to specify the number
4132 // of SSE registers used. The contents of %al do not need to match exactly
4133 // the number of registers, but must be an ubound on the number of SSE
4134 // registers used and is in the range 0 - 8 inclusive.
4135
4136 // Count the number of XMM registers allocated.
4137 static const MCPhysReg XMMArgRegs[] = {
4138 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4139 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4140 };
4141 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4142 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4143, __PRETTY_FUNCTION__))
4143 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4143, __PRETTY_FUNCTION__))
;
4144 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4145 DAG.getConstant(NumXMMRegs, dl,
4146 MVT::i8)));
4147 }
4148
4149 if (isVarArg && IsMustTail) {
4150 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4151 for (const auto &F : Forwards) {
4152 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4153 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4154 }
4155 }
4156
4157 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4158 // don't need this because the eligibility check rejects calls that require
4159 // shuffling arguments passed in memory.
4160 if (!IsSibcall && isTailCall) {
4161 // Force all the incoming stack arguments to be loaded from the stack
4162 // before any new outgoing arguments are stored to the stack, because the
4163 // outgoing stack slots may alias the incoming argument stack slots, and
4164 // the alias isn't otherwise explicit. This is slightly more conservative
4165 // than necessary, because it means that each store effectively depends
4166 // on every argument instead of just those arguments it would clobber.
4167 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4168
4169 SmallVector<SDValue, 8> MemOpChains2;
4170 SDValue FIN;
4171 int FI = 0;
4172 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4173 ++I, ++OutsIndex) {
4174 CCValAssign &VA = ArgLocs[I];
4175
4176 if (VA.isRegLoc()) {
4177 if (VA.needsCustom()) {
4178 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4179, __PRETTY_FUNCTION__))
4179 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4179, __PRETTY_FUNCTION__))
;
4180 // This means that we are in special case where one argument was
4181 // passed through two register locations - Skip the next location
4182 ++I;
4183 }
4184
4185 continue;
4186 }
4187
4188 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4188, __PRETTY_FUNCTION__))
;
4189 SDValue Arg = OutVals[OutsIndex];
4190 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4191 // Skip inalloca/preallocated arguments. They don't require any work.
4192 if (Flags.isInAlloca() || Flags.isPreallocated())
4193 continue;
4194 // Create frame index.
4195 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4196 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4197 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4198 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4199
4200 if (Flags.isByVal()) {
4201 // Copy relative to framepointer.
4202 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4203 if (!StackPtr.getNode())
4204 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4205 getPointerTy(DAG.getDataLayout()));
4206 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4207 StackPtr, Source);
4208
4209 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4210 ArgChain,
4211 Flags, DAG, dl));
4212 } else {
4213 // Store relative to framepointer.
4214 MemOpChains2.push_back(DAG.getStore(
4215 ArgChain, dl, Arg, FIN,
4216 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4217 }
4218 }
4219
4220 if (!MemOpChains2.empty())
4221 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4222
4223 // Store the return address to the appropriate stack slot.
4224 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4225 getPointerTy(DAG.getDataLayout()),
4226 RegInfo->getSlotSize(), FPDiff, dl);
4227 }
4228
4229 // Build a sequence of copy-to-reg nodes chained together with token chain
4230 // and flag operands which copy the outgoing args into registers.
4231 SDValue InFlag;
4232 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4233 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4234 RegsToPass[i].second, InFlag);
4235 InFlag = Chain.getValue(1);
4236 }
4237
4238 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4239 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4239, __PRETTY_FUNCTION__))
;
4240 // In the 64-bit large code model, we have to make all calls
4241 // through a register, since the call instruction's 32-bit
4242 // pc-relative offset may not be large enough to hold the whole
4243 // address.
4244 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4245 Callee->getOpcode() == ISD::ExternalSymbol) {
4246 // Lower direct calls to global addresses and external symbols. Setting
4247 // ForCall to true here has the effect of removing WrapperRIP when possible
4248 // to allow direct calls to be selected without first materializing the
4249 // address into a register.
4250 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4251 } else if (Subtarget.isTarget64BitILP32() &&
4252 Callee->getValueType(0) == MVT::i32) {
4253 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4254 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4255 }
4256
4257 // Returns a chain & a flag for retval copy to use.
4258 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4259 SmallVector<SDValue, 8> Ops;
4260
4261 if (!IsSibcall && isTailCall && !IsMustTail) {
4262 Chain = DAG.getCALLSEQ_END(Chain,
4263 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4264 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4265 InFlag = Chain.getValue(1);
4266 }
4267
4268 Ops.push_back(Chain);
4269 Ops.push_back(Callee);
4270
4271 if (isTailCall)
4272 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
4273
4274 // Add argument registers to the end of the list so that they are known live
4275 // into the call.
4276 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4277 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4278 RegsToPass[i].second.getValueType()));
4279
4280 // Add a register mask operand representing the call-preserved registers.
4281 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4282 // set X86_INTR calling convention because it has the same CSR mask
4283 // (same preserved registers).
4284 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4285 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4286 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4286, __PRETTY_FUNCTION__))
;
4287
4288 // If this is an invoke in a 32-bit function using a funclet-based
4289 // personality, assume the function clobbers all registers. If an exception
4290 // is thrown, the runtime will not restore CSRs.
4291 // FIXME: Model this more precisely so that we can register allocate across
4292 // the normal edge and spill and fill across the exceptional edge.
4293 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4294 const Function &CallerFn = MF.getFunction();
4295 EHPersonality Pers =
4296 CallerFn.hasPersonalityFn()
4297 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4298 : EHPersonality::Unknown;
4299 if (isFuncletEHPersonality(Pers))
4300 Mask = RegInfo->getNoPreservedMask();
4301 }
4302
4303 // Define a new register mask from the existing mask.
4304 uint32_t *RegMask = nullptr;
4305
4306 // In some calling conventions we need to remove the used physical registers
4307 // from the reg mask.
4308 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4309 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4310
4311 // Allocate a new Reg Mask and copy Mask.
4312 RegMask = MF.allocateRegMask();
4313 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4314 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4315
4316 // Make sure all sub registers of the argument registers are reset
4317 // in the RegMask.
4318 for (auto const &RegPair : RegsToPass)
4319 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4320 SubRegs.isValid(); ++SubRegs)
4321 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4322
4323 // Create the RegMask Operand according to our updated mask.
4324 Ops.push_back(DAG.getRegisterMask(RegMask));
4325 } else {
4326 // Create the RegMask Operand according to the static mask.
4327 Ops.push_back(DAG.getRegisterMask(Mask));
4328 }
4329
4330 if (InFlag.getNode())
4331 Ops.push_back(InFlag);
4332
4333 if (isTailCall) {
4334 // We used to do:
4335 //// If this is the first return lowered for this function, add the regs
4336 //// to the liveout set for the function.
4337 // This isn't right, although it's probably harmless on x86; liveouts
4338 // should be computed from returns not tail calls. Consider a void
4339 // function making a tail call to a function returning int.
4340 MF.getFrameInfo().setHasTailCall();
4341 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4342 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4343 return Ret;
4344 }
4345
4346 if (HasNoCfCheck && IsCFProtectionSupported) {
4347 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4348 } else {
4349 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4350 }
4351 InFlag = Chain.getValue(1);
4352 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4353 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4354
4355 // Save heapallocsite metadata.
4356 if (CLI.CB)
4357 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4358 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4359
4360 // Create the CALLSEQ_END node.
4361 unsigned NumBytesForCalleeToPop;
4362 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4363 DAG.getTarget().Options.GuaranteedTailCallOpt))
4364 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4365 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4366 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4367 SR == StackStructReturn)
4368 // If this is a call to a struct-return function, the callee
4369 // pops the hidden struct pointer, so we have to push it back.
4370 // This is common for Darwin/X86, Linux & Mingw32 targets.
4371 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4372 NumBytesForCalleeToPop = 4;
4373 else
4374 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4375
4376 // Returns a flag for retval copy to use.
4377 if (!IsSibcall) {
4378 Chain = DAG.getCALLSEQ_END(Chain,
4379 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4380 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4381 true),
4382 InFlag, dl);
4383 InFlag = Chain.getValue(1);
4384 }
4385
4386 // Handle result values, copying them out of physregs into vregs that we
4387 // return.
4388 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4389 InVals, RegMask);
4390}
4391
4392//===----------------------------------------------------------------------===//
4393// Fast Calling Convention (tail call) implementation
4394//===----------------------------------------------------------------------===//
4395
4396// Like std call, callee cleans arguments, convention except that ECX is
4397// reserved for storing the tail called function address. Only 2 registers are
4398// free for argument passing (inreg). Tail call optimization is performed
4399// provided:
4400// * tailcallopt is enabled
4401// * caller/callee are fastcc
4402// On X86_64 architecture with GOT-style position independent code only local
4403// (within module) calls are supported at the moment.
4404// To keep the stack aligned according to platform abi the function
4405// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4406// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4407// If a tail called function callee has more arguments than the caller the
4408// caller needs to make sure that there is room to move the RETADDR to. This is
4409// achieved by reserving an area the size of the argument delta right after the
4410// original RETADDR, but before the saved framepointer or the spilled registers
4411// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4412// stack layout:
4413// arg1
4414// arg2
4415// RETADDR
4416// [ new RETADDR
4417// move area ]
4418// (possible EBP)
4419// ESI
4420// EDI
4421// local1 ..
4422
4423/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4424/// requirement.
4425unsigned
4426X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4427 SelectionDAG &DAG) const {
4428 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4429 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4430 assert(StackSize % SlotSize == 0 &&((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4431, __PRETTY_FUNCTION__))
4431 "StackSize must be a multiple of SlotSize")((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4431, __PRETTY_FUNCTION__))
;
4432 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4433}
4434
4435/// Return true if the given stack call argument is already available in the
4436/// same position (relatively) of the caller's incoming argument stack.
4437static
4438bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4439 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4440 const X86InstrInfo *TII, const CCValAssign &VA) {
4441 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4442
4443 for (;;) {
4444 // Look through nodes that don't alter the bits of the incoming value.
4445 unsigned Op = Arg.getOpcode();
4446 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4447 Arg = Arg.getOperand(0);
4448 continue;
4449 }
4450 if (Op == ISD::TRUNCATE) {
4451 const SDValue &TruncInput = Arg.getOperand(0);
4452 if (TruncInput.getOpcode() == ISD::AssertZext &&
4453 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4454 Arg.getValueType()) {
4455 Arg = TruncInput.getOperand(0);
4456 continue;
4457 }
4458 }
4459 break;
4460 }
4461
4462 int FI = INT_MAX2147483647;
4463 if (Arg.getOpcode() == ISD::CopyFromReg) {
4464 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4465 if (!Register::isVirtualRegister(VR))
4466 return false;
4467 MachineInstr *Def = MRI->getVRegDef(VR);
4468 if (!Def)
4469 return false;
4470 if (!Flags.isByVal()) {
4471 if (!TII->isLoadFromStackSlot(*Def, FI))
4472 return false;
4473 } else {
4474 unsigned Opcode = Def->getOpcode();
4475 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4476 Opcode == X86::LEA64_32r) &&
4477 Def->getOperand(1).isFI()) {
4478 FI = Def->getOperand(1).getIndex();
4479 Bytes = Flags.getByValSize();
4480 } else
4481 return false;
4482 }
4483 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4484 if (Flags.isByVal())
4485 // ByVal argument is passed in as a pointer but it's now being
4486 // dereferenced. e.g.
4487 // define @foo(%struct.X* %A) {
4488 // tail call @bar(%struct.X* byval %A)
4489 // }
4490 return false;
4491 SDValue Ptr = Ld->getBasePtr();
4492 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4493 if (!FINode)
4494 return false;
4495 FI = FINode->getIndex();
4496 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4497 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4498 FI = FINode->getIndex();
4499 Bytes = Flags.getByValSize();
4500 } else
4501 return false;
4502
4503 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4503, __PRETTY_FUNCTION__))
;
4504 if (!MFI.isFixedObjectIndex(FI))
4505 return false;
4506
4507 if (Offset != MFI.getObjectOffset(FI))
4508 return false;
4509
4510 // If this is not byval, check that the argument stack object is immutable.
4511 // inalloca and argument copy elision can create mutable argument stack
4512 // objects. Byval objects can be mutated, but a byval call intends to pass the
4513 // mutated memory.
4514 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4515 return false;
4516
4517 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4518 // If the argument location is wider than the argument type, check that any
4519 // extension flags match.
4520 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4521 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4522 return false;
4523 }
4524 }
4525
4526 return Bytes == MFI.getObjectSize(FI);
4527}
4528
4529/// Check whether the call is eligible for tail call optimization. Targets
4530/// that want to do tail call optimization should implement this function.
4531bool X86TargetLowering::IsEligibleForTailCallOptimization(
4532 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4533 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4534 const SmallVectorImpl<ISD::OutputArg> &Outs,
4535 const SmallVectorImpl<SDValue> &OutVals,
4536 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4537 if (!mayTailCallThisCC(CalleeCC))
4538 return false;
4539
4540 // If -tailcallopt is specified, make fastcc functions tail-callable.
4541 MachineFunction &MF = DAG.getMachineFunction();
4542 const Function &CallerF = MF.getFunction();
4543
4544 // If the function return type is x86_fp80 and the callee return type is not,
4545 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4546 // perform a tailcall optimization here.
4547 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4548 return false;
4549
4550 CallingConv::ID CallerCC = CallerF.getCallingConv();
4551 bool CCMatch = CallerCC == CalleeCC;
4552 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4553 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4554 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4555 CalleeCC == CallingConv::Tail;
4556
4557 // Win64 functions have extra shadow space for argument homing. Don't do the
4558 // sibcall if the caller and callee have mismatched expectations for this
4559 // space.
4560 if (IsCalleeWin64 != IsCallerWin64)
4561 return false;
4562
4563 if (IsGuaranteeTCO) {
4564 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4565 return true;
4566 return false;
4567 }
4568
4569 // Look for obvious safe cases to perform tail call optimization that do not
4570 // require ABI changes. This is what gcc calls sibcall.
4571
4572 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4573 // emit a special epilogue.
4574 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4575 if (RegInfo->needsStackRealignment(MF))
4576 return false;
4577
4578 // Also avoid sibcall optimization if either caller or callee uses struct
4579 // return semantics.
4580 if (isCalleeStructRet || isCallerStructRet)
4581 return false;
4582
4583 // Do not sibcall optimize vararg calls unless all arguments are passed via
4584 // registers.
4585 LLVMContext &C = *DAG.getContext();
4586 if (isVarArg && !Outs.empty()) {
4587 // Optimizing for varargs on Win64 is unlikely to be safe without
4588 // additional testing.
4589 if (IsCalleeWin64 || IsCallerWin64)
4590 return false;
4591
4592 SmallVector<CCValAssign, 16> ArgLocs;
4593 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4594
4595 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4596 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4597 if (!ArgLocs[i].isRegLoc())
4598 return false;
4599 }
4600
4601 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4602 // stack. Therefore, if it's not used by the call it is not safe to optimize
4603 // this into a sibcall.
4604 bool Unused = false;
4605 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4606 if (!Ins[i].Used) {
4607 Unused = true;
4608 break;
4609 }
4610 }
4611 if (Unused) {
4612 SmallVector<CCValAssign, 16> RVLocs;
4613 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4614 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4615 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4616 CCValAssign &VA = RVLocs[i];
4617 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4618 return false;
4619 }
4620 }
4621
4622 // Check that the call results are passed in the same way.
4623 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4624 RetCC_X86, RetCC_X86))
4625 return false;
4626 // The callee has to preserve all registers the caller needs to preserve.
4627 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4628 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4629 if (!CCMatch) {
4630 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4631 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4632 return false;
4633 }
4634
4635 unsigned StackArgsSize = 0;
4636
4637 // If the callee takes no arguments then go on to check the results of the
4638 // call.
4639 if (!Outs.empty()) {
4640 // Check if stack adjustment is needed. For now, do not do this if any
4641 // argument is passed on the stack.
4642 SmallVector<CCValAssign, 16> ArgLocs;
4643 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4644
4645 // Allocate shadow area for Win64
4646 if (IsCalleeWin64)
4647 CCInfo.AllocateStack(32, Align(8));
4648
4649 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4650 StackArgsSize = CCInfo.getNextStackOffset();
4651
4652 if (CCInfo.getNextStackOffset()) {
4653 // Check if the arguments are already laid out in the right way as
4654 // the caller's fixed stack objects.
4655 MachineFrameInfo &MFI = MF.getFrameInfo();
4656 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4657 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4658 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4659 CCValAssign &VA = ArgLocs[i];
4660 SDValue Arg = OutVals[i];
4661 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4662 if (VA.getLocInfo() == CCValAssign::Indirect)
4663 return false;
4664 if (!VA.isRegLoc()) {
4665 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4666 MFI, MRI, TII, VA))
4667 return false;
4668 }
4669 }
4670 }
4671
4672 bool PositionIndependent = isPositionIndependent();
4673 // If the tailcall address may be in a register, then make sure it's
4674 // possible to register allocate for it. In 32-bit, the call address can
4675 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4676 // callee-saved registers are restored. These happen to be the same
4677 // registers used to pass 'inreg' arguments so watch out for those.
4678 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4679 !isa<ExternalSymbolSDNode>(Callee)) ||
4680 PositionIndependent)) {
4681 unsigned NumInRegs = 0;
4682 // In PIC we need an extra register to formulate the address computation
4683 // for the callee.
4684 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4685
4686 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4687 CCValAssign &VA = ArgLocs[i];
4688 if (!VA.isRegLoc())
4689 continue;
4690 Register Reg = VA.getLocReg();
4691 switch (Reg) {
4692 default: break;
4693 case X86::EAX: case X86::EDX: case X86::ECX:
4694 if (++NumInRegs == MaxInRegs)
4695 return false;
4696 break;
4697 }
4698 }
4699 }
4700
4701 const MachineRegisterInfo &MRI = MF.getRegInfo();
4702 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4703 return false;
4704 }
4705
4706 bool CalleeWillPop =
4707 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4708 MF.getTarget().Options.GuaranteedTailCallOpt);
4709
4710 if (unsigned BytesToPop =
4711 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4712 // If we have bytes to pop, the callee must pop them.
4713 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4714 if (!CalleePopMatches)
4715 return false;
4716 } else if (CalleeWillPop && StackArgsSize > 0) {
4717 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4718 return false;
4719 }
4720
4721 return true;
4722}
4723
4724FastISel *
4725X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4726 const TargetLibraryInfo *libInfo) const {
4727 return X86::createFastISel(funcInfo, libInfo);
4728}
4729
4730//===----------------------------------------------------------------------===//
4731// Other Lowering Hooks
4732//===----------------------------------------------------------------------===//
4733
4734static bool MayFoldLoad(SDValue Op) {
4735 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4736}
4737
4738static bool MayFoldIntoStore(SDValue Op) {
4739 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4740}
4741
4742static bool MayFoldIntoZeroExtend(SDValue Op) {
4743 if (Op.hasOneUse()) {
4744 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4745 return (ISD::ZERO_EXTEND == Opcode);
4746 }
4747 return false;
4748}
4749
4750static bool isTargetShuffle(unsigned Opcode) {
4751 switch(Opcode) {
4752 default: return false;
4753 case X86ISD::BLENDI:
4754 case X86ISD::PSHUFB:
4755 case X86ISD::PSHUFD:
4756 case X86ISD::PSHUFHW:
4757 case X86ISD::PSHUFLW:
4758 case X86ISD::SHUFP:
4759 case X86ISD::INSERTPS:
4760 case X86ISD::EXTRQI:
4761 case X86ISD::INSERTQI:
4762 case X86ISD::VALIGN:
4763 case X86ISD::PALIGNR:
4764 case X86ISD::VSHLDQ:
4765 case X86ISD::VSRLDQ:
4766 case X86ISD::MOVLHPS:
4767 case X86ISD::MOVHLPS:
4768 case X86ISD::MOVSHDUP:
4769 case X86ISD::MOVSLDUP:
4770 case X86ISD::MOVDDUP:
4771 case X86ISD::MOVSS:
4772 case X86ISD::MOVSD:
4773 case X86ISD::UNPCKL:
4774 case X86ISD::UNPCKH:
4775 case X86ISD::VBROADCAST:
4776 case X86ISD::VPERMILPI:
4777 case X86ISD::VPERMILPV:
4778 case X86ISD::VPERM2X128:
4779 case X86ISD::SHUF128:
4780 case X86ISD::VPERMIL2:
4781 case X86ISD::VPERMI:
4782 case X86ISD::VPPERM:
4783 case X86ISD::VPERMV:
4784 case X86ISD::VPERMV3:
4785 case X86ISD::VZEXT_MOVL:
4786 return true;
4787 }
4788}
4789
4790static bool isTargetShuffleVariableMask(unsigned Opcode) {
4791 switch (Opcode) {
4792 default: return false;
4793 // Target Shuffles.
4794 case X86ISD::PSHUFB:
4795 case X86ISD::VPERMILPV:
4796 case X86ISD::VPERMIL2:
4797 case X86ISD::VPPERM:
4798 case X86ISD::VPERMV:
4799 case X86ISD::VPERMV3:
4800 return true;
4801 // 'Faux' Target Shuffles.
4802 case ISD::OR:
4803 case ISD::AND:
4804 case X86ISD::ANDNP:
4805 return true;
4806 }
4807}
4808
4809static bool isTargetShuffleSplat(SDValue Op) {
4810 unsigned Opcode = Op.getOpcode();
4811 if (Opcode == ISD::EXTRACT_SUBVECTOR)
4812 return isTargetShuffleSplat(Op.getOperand(0));
4813 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
4814}
4815
4816SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4817 MachineFunction &MF = DAG.getMachineFunction();
4818 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4819 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4820 int ReturnAddrIndex = FuncInfo->getRAIndex();
4821
4822 if (ReturnAddrIndex == 0) {
4823 // Set up a frame object for the return address.
4824 unsigned SlotSize = RegInfo->getSlotSize();
4825 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4826 -(int64_t)SlotSize,
4827 false);
4828 FuncInfo->setRAIndex(ReturnAddrIndex);
4829 }
4830
4831 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4832}
4833
4834bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4835 bool hasSymbolicDisplacement) {
4836 // Offset should fit into 32 bit immediate field.
4837 if (!isInt<32>(Offset))
4838 return false;
4839
4840 // If we don't have a symbolic displacement - we don't have any extra
4841 // restrictions.
4842 if (!hasSymbolicDisplacement)
4843 return true;
4844
4845 // FIXME: Some tweaks might be needed for medium code model.
4846 if (M != CodeModel::Small && M != CodeModel::Kernel)
4847 return false;
4848
4849 // For small code model we assume that latest object is 16MB before end of 31
4850 // bits boundary. We may also accept pretty large negative constants knowing
4851 // that all objects are in the positive half of address space.
4852 if (M == CodeModel::Small && Offset < 16*1024*1024)
4853 return true;
4854
4855 // For kernel code model we know that all object resist in the negative half
4856 // of 32bits address space. We may not accept negative offsets, since they may
4857 // be just off and we may accept pretty large positive ones.
4858 if (M == CodeModel::Kernel && Offset >= 0)
4859 return true;
4860
4861 return false;
4862}
4863
4864/// Determines whether the callee is required to pop its own arguments.
4865/// Callee pop is necessary to support tail calls.
4866bool X86::isCalleePop(CallingConv::ID CallingConv,
4867 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4868 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4869 // can guarantee TCO.
4870 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4871 return true;
4872
4873 switch (CallingConv) {
4874 default:
4875 return false;
4876 case CallingConv::X86_StdCall:
4877 case CallingConv::X86_FastCall:
4878 case CallingConv::X86_ThisCall:
4879 case CallingConv::X86_VectorCall:
4880 return !is64Bit;
4881 }
4882}
4883
4884/// Return true if the condition is an signed comparison operation.
4885static bool isX86CCSigned(unsigned X86CC) {
4886 switch (X86CC) {
4887 default:
4888 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4888)
;
4889 case X86::COND_E:
4890 case X86::COND_NE:
4891 case X86::COND_B:
4892 case X86::COND_A:
4893 case X86::COND_BE:
4894 case X86::COND_AE:
4895 return false;
4896 case X86::COND_G:
4897 case X86::COND_GE:
4898 case X86::COND_L:
4899 case X86::COND_LE:
4900 return true;
4901 }
4902}
4903
4904static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4905 switch (SetCCOpcode) {
4906 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4906)
;
4907 case ISD::SETEQ: return X86::COND_E;
4908 case ISD::SETGT: return X86::COND_G;
4909 case ISD::SETGE: return X86::COND_GE;
4910 case ISD::SETLT: return X86::COND_L;
4911 case ISD::SETLE: return X86::COND_LE;
4912 case ISD::SETNE: return X86::COND_NE;
4913 case ISD::SETULT: return X86::COND_B;
4914 case ISD::SETUGT: return X86::COND_A;
4915 case ISD::SETULE: return X86::COND_BE;
4916 case ISD::SETUGE: return X86::COND_AE;
4917 }
4918}
4919
4920/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4921/// condition code, returning the condition code and the LHS/RHS of the
4922/// comparison to make.
4923static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4924 bool isFP, SDValue &LHS, SDValue &RHS,
4925 SelectionDAG &DAG) {
4926 if (!isFP) {
4927 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4928 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4929 // X > -1 -> X == 0, jump !sign.
4930 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4931 return X86::COND_NS;
4932 }
4933 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4934 // X < 0 -> X == 0, jump on sign.
4935 return X86::COND_S;
4936 }
4937 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
4938 // X >= 0 -> X == 0, jump on !sign.
4939 return X86::COND_NS;
4940 }
4941 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
4942 // X < 1 -> X <= 0
4943 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4944 return X86::COND_LE;
4945 }
4946 }
4947
4948 return TranslateIntegerX86CC(SetCCOpcode);
4949 }
4950
4951 // First determine if it is required or is profitable to flip the operands.
4952
4953 // If LHS is a foldable load, but RHS is not, flip the condition.
4954 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4955 !ISD::isNON_EXTLoad(RHS.getNode())) {
4956 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4957 std::swap(LHS, RHS);
4958 }
4959
4960 switch (SetCCOpcode) {
4961 default: break;
4962 case ISD::SETOLT:
4963 case ISD::SETOLE:
4964 case ISD::SETUGT:
4965 case ISD::SETUGE:
4966 std::swap(LHS, RHS);
4967 break;
4968 }
4969
4970 // On a floating point condition, the flags are set as follows:
4971 // ZF PF CF op
4972 // 0 | 0 | 0 | X > Y
4973 // 0 | 0 | 1 | X < Y
4974 // 1 | 0 | 0 | X == Y
4975 // 1 | 1 | 1 | unordered
4976 switch (SetCCOpcode) {
4977 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4977)
;
4978 case ISD::SETUEQ:
4979 case ISD::SETEQ: return X86::COND_E;
4980 case ISD::SETOLT: // flipped
4981 case ISD::SETOGT:
4982 case ISD::SETGT: return X86::COND_A;
4983 case ISD::SETOLE: // flipped
4984 case ISD::SETOGE:
4985 case ISD::SETGE: return X86::COND_AE;
4986 case ISD::SETUGT: // flipped
4987 case ISD::SETULT:
4988 case ISD::SETLT: return X86::COND_B;
4989 case ISD::SETUGE: // flipped
4990 case ISD::SETULE:
4991 case ISD::SETLE: return X86::COND_BE;
4992 case ISD::SETONE:
4993 case ISD::SETNE: return X86::COND_NE;
4994 case ISD::SETUO: return X86::COND_P;
4995 case ISD::SETO: return X86::COND_NP;
4996 case ISD::SETOEQ:
4997 case ISD::SETUNE: return X86::COND_INVALID;
4998 }
4999}
5000
5001/// Is there a floating point cmov for the specific X86 condition code?
5002/// Current x86 isa includes the following FP cmov instructions:
5003/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5004static bool hasFPCMov(unsigned X86CC) {
5005 switch (X86CC) {
5006 default:
5007 return false;
5008 case X86::COND_B:
5009 case X86::COND_BE:
5010 case X86::COND_E:
5011 case X86::COND_P:
5012 case X86::COND_A:
5013 case X86::COND_AE:
5014 case X86::COND_NE:
5015 case X86::COND_NP:
5016 return true;
5017 }
5018}
5019
5020
5021bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5022 const CallInst &I,
5023 MachineFunction &MF,
5024 unsigned Intrinsic) const {
5025
5026 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5027 if (!IntrData)
5028 return false;
5029
5030 Info.flags = MachineMemOperand::MONone;
5031 Info.offset = 0;
5032
5033 switch (IntrData->Type) {
5034 case TRUNCATE_TO_MEM_VI8:
5035 case TRUNCATE_TO_MEM_VI16:
5036 case TRUNCATE_TO_MEM_VI32: {
5037 Info.opc = ISD::INTRINSIC_VOID;
5038 Info.ptrVal = I.getArgOperand(0);
5039 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5040 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5041 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5042 ScalarVT = MVT::i8;
5043 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5044 ScalarVT = MVT::i16;
5045 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5046 ScalarVT = MVT::i32;
5047
5048 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5049 Info.align = Align(1);
5050 Info.flags |= MachineMemOperand::MOStore;
5051 break;
5052 }
5053 case GATHER:
5054 case GATHER_AVX2: {
5055 Info.opc = ISD::INTRINSIC_W_CHAIN;
5056 Info.ptrVal = nullptr;
5057 MVT DataVT = MVT::getVT(I.getType());
5058 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5059 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5060 IndexVT.getVectorNumElements());
5061 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5062 Info.align = Align(1);
5063 Info.flags |= MachineMemOperand::MOLoad;
5064 break;
5065 }
5066 case SCATTER: {
5067 Info.opc = ISD::INTRINSIC_VOID;
5068 Info.ptrVal = nullptr;
5069 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5070 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5071 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5072 IndexVT.getVectorNumElements());
5073 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5074 Info.align = Align(1);
5075 Info.flags |= MachineMemOperand::MOStore;
5076 break;
5077 }
5078 default:
5079 return false;
5080 }
5081
5082 return true;
5083}
5084
5085/// Returns true if the target can instruction select the
5086/// specified FP immediate natively. If false, the legalizer will
5087/// materialize the FP immediate as a load from a constant pool.
5088bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5089 bool ForCodeSize) const {
5090 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5091 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5092 return true;
5093 }
5094 return false;
5095}
5096
5097bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5098 ISD::LoadExtType ExtTy,
5099 EVT NewVT) const {
5100 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"
) ? static_cast<void> (0) : __assert_fail ("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5100, __PRETTY_FUNCTION__))
;
5101
5102 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5103 // relocation target a movq or addq instruction: don't let the load shrink.
5104 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5105 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5106 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5107 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5108
5109 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5110 // those uses are extracted directly into a store, then the extract + store
5111 // can be store-folded. Therefore, it's probably not worth splitting the load.
5112 EVT VT = Load->getValueType(0);
5113 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5114 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5115 // Skip uses of the chain value. Result 0 of the node is the load value.
5116 if (UI.getUse().getResNo() != 0)
5117 continue;
5118
5119 // If this use is not an extract + store, it's probably worth splitting.
5120 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5121 UI->use_begin()->getOpcode() != ISD::STORE)
5122 return true;
5123 }
5124 // All non-chain uses are extract + store.
5125 return false;
5126 }
5127
5128 return true;
5129}
5130
5131/// Returns true if it is beneficial to convert a load of a constant
5132/// to just the constant itself.
5133bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5134 Type *Ty) const {
5135 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5135, __PRETTY_FUNCTION__))
;
5136
5137 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5138 if (BitSize == 0 || BitSize > 64)
5139 return false;
5140 return true;
5141}
5142
5143bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5144 // If we are using XMM registers in the ABI and the condition of the select is
5145 // a floating-point compare and we have blendv or conditional move, then it is
5146 // cheaper to select instead of doing a cross-register move and creating a
5147 // load that depends on the compare result.
5148 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5149 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5150}
5151
5152bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5153 // TODO: It might be a win to ease or lift this restriction, but the generic
5154 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5155 if (VT.isVector() && Subtarget.hasAVX512())
5156 return false;
5157
5158 return true;
5159}
5160
5161bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5162 SDValue C) const {
5163 // TODO: We handle scalars using custom code, but generic combining could make
5164 // that unnecessary.
5165 APInt MulC;
5166 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5167 return false;
5168
5169 // Find the type this will be legalized too. Otherwise we might prematurely
5170 // convert this to shl+add/sub and then still have to type legalize those ops.
5171 // Another choice would be to defer the decision for illegal types until
5172 // after type legalization. But constant splat vectors of i64 can't make it
5173 // through type legalization on 32-bit targets so we would need to special
5174 // case vXi64.
5175 while (getTypeAction(Context, VT) != TypeLegal)
5176 VT = getTypeToTransformTo(Context, VT);
5177
5178 // If vector multiply is legal, assume that's faster than shl + add/sub.
5179 // TODO: Multiply is a complex op with higher latency and lower throughput in
5180 // most implementations, so this check could be loosened based on type
5181 // and/or a CPU attribute.
5182 if (isOperationLegal(ISD::MUL, VT))
5183 return false;
5184
5185 // shl+add, shl+sub, shl+add+neg
5186 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5187 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5188}
5189
5190bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5191 unsigned Index) const {
5192 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5193 return false;
5194
5195 // Mask vectors support all subregister combinations and operations that
5196 // extract half of vector.
5197 if (ResVT.getVectorElementType() == MVT::i1)
5198 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5199 (Index == ResVT.getVectorNumElements()));
5200
5201 return (Index % ResVT.getVectorNumElements()) == 0;
5202}
5203
5204bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5205 unsigned Opc = VecOp.getOpcode();
5206
5207 // Assume target opcodes can't be scalarized.
5208 // TODO - do we have any exceptions?
5209 if (Opc >= ISD::BUILTIN_OP_END)
5210 return false;
5211
5212 // If the vector op is not supported, try to convert to scalar.
5213 EVT VecVT = VecOp.getValueType();
5214 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5215 return true;
5216
5217 // If the vector op is supported, but the scalar op is not, the transform may
5218 // not be worthwhile.
5219 EVT ScalarVT = VecVT.getScalarType();
5220 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5221}
5222
5223bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5224 bool) const {
5225 // TODO: Allow vectors?
5226 if (VT.isVector())
5227 return false;
5228 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5229}
5230
5231bool X86TargetLowering::isCheapToSpeculateCttz() const {
5232 // Speculate cttz only if we can directly use TZCNT.
5233 return Subtarget.hasBMI();
5234}
5235
5236bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5237 // Speculate ctlz only if we can directly use LZCNT.
5238 return Subtarget.hasLZCNT();
5239}
5240
5241bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5242 const SelectionDAG &DAG,
5243 const MachineMemOperand &MMO) const {
5244 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5245 BitcastVT.getVectorElementType() == MVT::i1)
5246 return false;
5247
5248 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5249 return false;
5250
5251 // If both types are legal vectors, it's always ok to convert them.
5252 if (LoadVT.isVector() && BitcastVT.isVector() &&
5253 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5254 return true;
5255
5256 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5257}
5258
5259bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5260 const SelectionDAG &DAG) const {
5261 // Do not merge to float value size (128 bytes) if no implicit
5262 // float attribute is set.
5263 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5264 Attribute::NoImplicitFloat);
5265
5266 if (NoFloat) {
5267 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5268 return (MemVT.getSizeInBits() <= MaxIntSize);
5269 }
5270 // Make sure we don't merge greater than our preferred vector
5271 // width.
5272 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5273 return false;
5274 return true;
5275}
5276
5277bool X86TargetLowering::isCtlzFast() const {
5278 return Subtarget.hasFastLZCNT();
5279}
5280
5281bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5282 const Instruction &AndI) const {
5283 return true;
5284}
5285
5286bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5287 EVT VT = Y.getValueType();
5288
5289 if (VT.isVector())
5290 return false;
5291
5292 if (!Subtarget.hasBMI())
5293 return false;
5294
5295 // There are only 32-bit and 64-bit forms for 'andn'.
5296 if (VT != MVT::i32 && VT != MVT::i64)
5297 return false;
5298
5299 return !isa<ConstantSDNode>(Y);
5300}
5301
5302bool X86TargetLowering::hasAndNot(SDValue Y) const {
5303 EVT VT = Y.getValueType();
5304
5305 if (!VT.isVector())
5306 return hasAndNotCompare(Y);
5307
5308 // Vector.
5309
5310 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5311 return false;
5312
5313 if (VT == MVT::v4i32)
5314 return true;
5315
5316 return Subtarget.hasSSE2();
5317}
5318
5319bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5320 return X.getValueType().isScalarInteger(); // 'bt'
5321}
5322
5323bool X86TargetLowering::
5324 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5325 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5326 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5327 SelectionDAG &DAG) const {
5328 // Does baseline recommend not to perform the fold by default?
5329 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5330 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5331 return false;
5332 // For scalars this transform is always beneficial.
5333 if (X.getValueType().isScalarInteger())
5334 return true;
5335 // If all the shift amounts are identical, then transform is beneficial even
5336 // with rudimentary SSE2 shifts.
5337 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5338 return true;
5339 // If we have AVX2 with it's powerful shift operations, then it's also good.
5340 if (Subtarget.hasAVX2())
5341 return true;
5342 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5343 return NewShiftOpcode == ISD::SHL;
5344}
5345
5346bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5347 const SDNode *N, CombineLevel Level) const {
5348 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
5349 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
5350 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
5351 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
5352 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
;
5353 EVT VT = N->getValueType(0);
5354 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5355 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5356 // Only fold if the shift values are equal - so it folds to AND.
5357 // TODO - we should fold if either is a non-uniform vector but we don't do
5358 // the fold for non-splats yet.
5359 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5360 }
5361 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5362}
5363
5364bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5365 EVT VT = Y.getValueType();
5366
5367 // For vectors, we don't have a preference, but we probably want a mask.
5368 if (VT.isVector())
5369 return false;
5370
5371 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5372 if (VT == MVT::i64 && !Subtarget.is64Bit())
5373 return false;
5374
5375 return true;
5376}
5377
5378bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5379 SDNode *N) const {
5380 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5381 !Subtarget.isOSWindows())
5382 return false;
5383 return true;
5384}
5385
5386bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5387 // Any legal vector type can be splatted more efficiently than
5388 // loading/spilling from memory.
5389 return isTypeLegal(VT);
5390}
5391
5392MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5393 MVT VT = MVT::getIntegerVT(NumBits);
5394 if (isTypeLegal(VT))
5395 return VT;
5396
5397 // PMOVMSKB can handle this.
5398 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5399 return MVT::v16i8;
5400
5401 // VPMOVMSKB can handle this.
5402 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5403 return MVT::v32i8;
5404
5405 // TODO: Allow 64-bit type for 32-bit target.
5406 // TODO: 512-bit types should be allowed, but make sure that those
5407 // cases are handled in combineVectorSizedSetCCEquality().
5408
5409 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5410}
5411
5412/// Val is the undef sentinel value or equal to the specified value.
5413static bool isUndefOrEqual(int Val, int CmpVal) {
5414 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5415}
5416
5417/// Val is either the undef or zero sentinel value.
5418static bool isUndefOrZero(int Val) {
5419 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5420}
5421
5422/// Return true if every element in Mask, beginning from position Pos and ending
5423/// in Pos+Size is the undef sentinel value.
5424static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5425 return llvm::all_of(Mask.slice(Pos, Size),
5426 [](int M) { return M == SM_SentinelUndef; });
5427}
5428
5429/// Return true if the mask creates a vector whose lower half is undefined.
5430static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5431 unsigned NumElts = Mask.size();
5432 return isUndefInRange(Mask, 0, NumElts / 2);
5433}
5434
5435/// Return true if the mask creates a vector whose upper half is undefined.
5436static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5437 unsigned NumElts = Mask.size();
5438 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5439}
5440
5441/// Return true if Val falls within the specified range (L, H].
5442static bool isInRange(int Val, int Low, int Hi) {
5443 return (Val >= Low && Val < Hi);
5444}
5445
5446/// Return true if the value of any element in Mask falls within the specified
5447/// range (L, H].
5448static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5449 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5450}
5451
5452/// Return true if the value of any element in Mask is the zero sentinel value.
5453static bool isAnyZero(ArrayRef<int> Mask) {
5454 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5455}
5456
5457/// Return true if the value of any element in Mask is the zero or undef
5458/// sentinel values.
5459static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5460 return llvm::any_of(Mask, [](int M) {
5461 return M == SM_SentinelZero || M == SM_SentinelUndef;
5462 });
5463}
5464
5465/// Return true if Val is undef or if its value falls within the
5466/// specified range (L, H].
5467static bool isUndefOrInRange(int Val, int Low, int Hi) {
5468 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5469}
5470
5471/// Return true if every element in Mask is undef or if its value
5472/// falls within the specified range (L, H].
5473static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5474 return llvm::all_of(
5475 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5476}
5477
5478/// Return true if Val is undef, zero or if its value falls within the
5479/// specified range (L, H].
5480static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5481 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5482}
5483
5484/// Return true if every element in Mask is undef, zero or if its value
5485/// falls within the specified range (L, H].
5486static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5487 return llvm::all_of(
5488 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5489}
5490
5491/// Return true if every element in Mask, beginning
5492/// from position Pos and ending in Pos + Size, falls within the specified
5493/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5494static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5495 unsigned Size, int Low, int Step = 1) {
5496 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5497 if (!isUndefOrEqual(Mask[i], Low))
5498 return false;
5499 return true;
5500}
5501
5502/// Return true if every element in Mask, beginning
5503/// from position Pos and ending in Pos+Size, falls within the specified
5504/// sequential range (Low, Low+Size], or is undef or is zero.
5505static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5506 unsigned Size, int Low,
5507 int Step = 1) {
5508 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5509 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5510 return false;
5511 return true;
5512}
5513
5514/// Return true if every element in Mask, beginning
5515/// from position Pos and ending in Pos+Size is undef or is zero.
5516static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5517 unsigned Size) {
5518 return llvm::all_of(Mask.slice(Pos, Size),
5519 [](int M) { return isUndefOrZero(M); });
5520}
5521
5522/// Helper function to test whether a shuffle mask could be
5523/// simplified by widening the elements being shuffled.
5524///
5525/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5526/// leaves it in an unspecified state.
5527///
5528/// NOTE: This must handle normal vector shuffle masks and *target* vector
5529/// shuffle masks. The latter have the special property of a '-2' representing
5530/// a zero-ed lane of a vector.
5531static bool canWidenShuffleElements(ArrayRef<int> Mask,
5532 SmallVectorImpl<int> &WidenedMask) {
5533 WidenedMask.assign(Mask.size() / 2, 0);
5534 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5535 int M0 = Mask[i];
5536 int M1 = Mask[i + 1];
5537
5538 // If both elements are undef, its trivial.
5539 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5540 WidenedMask[i / 2] = SM_SentinelUndef;
5541 continue;
5542 }
5543
5544 // Check for an undef mask and a mask value properly aligned to fit with
5545 // a pair of values. If we find such a case, use the non-undef mask's value.
5546 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5547 WidenedMask[i / 2] = M1 / 2;
5548 continue;
5549 }
5550 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5551 WidenedMask[i / 2] = M0 / 2;
5552 continue;
5553 }
5554
5555 // When zeroing, we need to spread the zeroing across both lanes to widen.
5556 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5557 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5558 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5559 WidenedMask[i / 2] = SM_SentinelZero;
5560 continue;
5561 }
5562 return false;
5563 }
5564
5565 // Finally check if the two mask values are adjacent and aligned with
5566 // a pair.
5567 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5568 WidenedMask[i / 2] = M0 / 2;
5569 continue;
5570 }
5571
5572 // Otherwise we can't safely widen the elements used in this shuffle.
5573 return false;
5574 }
5575 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5576, __PRETTY_FUNCTION__))
5576 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5576, __PRETTY_FUNCTION__))
;
5577
5578 return true;
5579}
5580
5581static bool canWidenShuffleElements(ArrayRef<int> Mask,
5582 const APInt &Zeroable,
5583 bool V2IsZero,
5584 SmallVectorImpl<int> &WidenedMask) {
5585 // Create an alternative mask with info about zeroable elements.
5586 // Here we do not set undef elements as zeroable.
5587 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5588 if (V2IsZero) {
5589 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((!Zeroable.isNullValue() && "V2's non-undef elements are used?!"
) ? static_cast<void> (0) : __assert_fail ("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5589, __PRETTY_FUNCTION__))
;
5590 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5591 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5592 ZeroableMask[i] = SM_SentinelZero;
5593 }
5594 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5595}
5596
5597static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5598 SmallVector<int, 32> WidenedMask;
5599 return canWidenShuffleElements(Mask, WidenedMask);
5600}
5601
5602// Attempt to narrow/widen shuffle mask until it matches the target number of
5603// elements.
5604static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
5605 SmallVectorImpl<int> &ScaledMask) {
5606 unsigned NumSrcElts = Mask.size();
5607 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&((((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts
) == 0) && "Illegal shuffle scale factor") ? static_cast
<void> (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5608, __PRETTY_FUNCTION__))
5608 "Illegal shuffle scale factor")((((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts
) == 0) && "Illegal shuffle scale factor") ? static_cast
<void> (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5608, __PRETTY_FUNCTION__))
;
5609
5610 // Narrowing is guaranteed to work.
5611 if (NumDstElts >= NumSrcElts) {
5612 int Scale = NumDstElts / NumSrcElts;
5613 llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask);
5614 return true;
5615 }
5616
5617 // We have to repeat the widening until we reach the target size, but we can
5618 // split out the first widening as it sets up ScaledMask for us.
5619 if (canWidenShuffleElements(Mask, ScaledMask)) {
5620 while (ScaledMask.size() > NumDstElts) {
5621 SmallVector<int, 16> WidenedMask;
5622 if (!canWidenShuffleElements(ScaledMask, WidenedMask))
5623 return false;
5624 ScaledMask = std::move(WidenedMask);
5625 }
5626 return true;
5627 }
5628
5629 return false;
5630}
5631
5632/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5633bool X86::isZeroNode(SDValue Elt) {
5634 return isNullConstant(Elt) || isNullFPConstant(Elt);
5635}
5636
5637// Build a vector of constants.
5638// Use an UNDEF node if MaskElt == -1.
5639// Split 64-bit constants in the 32-bit mode.
5640static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5641 const SDLoc &dl, bool IsMask = false) {
5642
5643 SmallVector<SDValue, 32> Ops;
5644 bool Split = false;
5645
5646 MVT ConstVecVT = VT;
5647 unsigned NumElts = VT.getVectorNumElements();
5648 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5649 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5650 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5651 Split = true;
5652 }
5653
5654 MVT EltVT = ConstVecVT.getVectorElementType();
5655 for (unsigned i = 0; i < NumElts; ++i) {
5656 bool IsUndef = Values[i] < 0 && IsMask;
5657 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5658 DAG.getConstant(Values[i], dl, EltVT);
5659 Ops.push_back(OpNode);
5660 if (Split)
5661 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5662 DAG.getConstant(0, dl, EltVT));
5663 }
5664 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5665 if (Split)
5666 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5667 return ConstsNode;
5668}
5669
5670static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5671 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5672 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5673, __PRETTY_FUNCTION__))
5673 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5673, __PRETTY_FUNCTION__))
;
5674 SmallVector<SDValue, 32> Ops;
5675 bool Split = false;
5676
5677 MVT ConstVecVT = VT;
5678 unsigned NumElts = VT.getVectorNumElements();
5679 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5680 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5681 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5682 Split = true;
5683 }
5684
5685 MVT EltVT = ConstVecVT.getVectorElementType();
5686 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5687 if (Undefs[i]) {
5688 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5689 continue;
5690 }
5691 const APInt &V = Bits[i];
5692 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5692, __PRETTY_FUNCTION__))
;
5693 if (Split) {
5694 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5695 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5696 } else if (EltVT == MVT::f32) {
5697 APFloat FV(APFloat::IEEEsingle(), V);
5698 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5699 } else if (EltVT == MVT::f64) {
5700 APFloat FV(APFloat::IEEEdouble(), V);
5701 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5702 } else {
5703 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5704 }
5705 }
5706
5707 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5708 return DAG.getBitcast(VT, ConstsNode);
5709}
5710
5711/// Returns a vector of specified type with all zero elements.
5712static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5713 SelectionDAG &DAG, const SDLoc &dl) {
5714 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5716, __PRETTY_FUNCTION__))
5715 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5716, __PRETTY_FUNCTION__))
5716 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5716, __PRETTY_FUNCTION__))
;
5717
5718 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5719 // type. This ensures they get CSE'd. But if the integer type is not
5720 // available, use a floating-point +0.0 instead.
5721 SDValue Vec;
5722 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5723 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5724 } else if (VT.isFloatingPoint()) {
5725 Vec = DAG.getConstantFP(+0.0, dl, VT);
5726 } else if (VT.getVectorElementType() == MVT::i1) {
5727 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5728, __PRETTY_FUNCTION__))
5728 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5728, __PRETTY_FUNCTION__))
;
5729 Vec = DAG.getConstant(0, dl, VT);
5730 } else {
5731 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5732 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5733 }
5734 return DAG.getBitcast(VT, Vec);
5735}
5736
5737static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5738 const SDLoc &dl, unsigned vectorWidth) {
5739 EVT VT = Vec.getValueType();
5740 EVT ElVT = VT.getVectorElementType();
5741 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5742 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5743 VT.getVectorNumElements()/Factor);
5744
5745 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5746 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5747 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5747, __PRETTY_FUNCTION__))
;
5748
5749 // This is the index of the first element of the vectorWidth-bit chunk
5750 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5751 IdxVal &= ~(ElemsPerChunk - 1);
5752
5753 // If the input is a buildvector just emit a smaller one.
5754 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5755 return DAG.getBuildVector(ResultVT, dl,
5756 Vec->ops().slice(IdxVal, ElemsPerChunk));
5757
5758 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5759 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5760}
5761
5762/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5763/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5764/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5765/// instructions or a simple subregister reference. Idx is an index in the
5766/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5767/// lowering EXTRACT_VECTOR_ELT operations easier.
5768static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5769 SelectionDAG &DAG, const SDLoc &dl) {
5770 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5771, __PRETTY_FUNCTION__))
5771 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5771, __PRETTY_FUNCTION__))
;
5772 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5773}
5774
5775/// Generate a DAG to grab 256-bits from a 512-bit vector.
5776static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5777 SelectionDAG &DAG, const SDLoc &dl) {
5778 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5778, __PRETTY_FUNCTION__))
;
5779 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5780}
5781
5782static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5783 SelectionDAG &DAG, const SDLoc &dl,
5784 unsigned vectorWidth) {
5785 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5786, __PRETTY_FUNCTION__))
5786 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5786, __PRETTY_FUNCTION__))
;
5787 // Inserting UNDEF is Result
5788 if (Vec.isUndef())
5789 return Result;
5790 EVT VT = Vec.getValueType();
5791 EVT ElVT = VT.getVectorElementType();
5792 EVT ResultVT = Result.getValueType();
5793
5794 // Insert the relevant vectorWidth bits.
5795 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5796 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5796, __PRETTY_FUNCTION__))
;
5797
5798 // This is the index of the first element of the vectorWidth-bit chunk
5799 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5800 IdxVal &= ~(ElemsPerChunk - 1);
5801
5802 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5803 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5804}
5805
5806/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5807/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5808/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5809/// simple superregister reference. Idx is an index in the 128 bits
5810/// we want. It need not be aligned to a 128-bit boundary. That makes
5811/// lowering INSERT_VECTOR_ELT operations easier.
5812static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5813 SelectionDAG &DAG, const SDLoc &dl) {
5814 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5814, __PRETTY_FUNCTION__))
;
5815 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5816}
5817
5818/// Widen a vector to a larger size with the same scalar type, with the new
5819/// elements either zero or undef.
5820static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5821 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5822 const SDLoc &dl) {
5823 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5825, __PRETTY_FUNCTION__))
5824 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5825, __PRETTY_FUNCTION__))
5825 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5825, __PRETTY_FUNCTION__))
;
5826 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5827 : DAG.getUNDEF(VT);
5828 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5829 DAG.getIntPtrConstant(0, dl));
5830}
5831
5832/// Widen a vector to a larger size with the same scalar type, with the new
5833/// elements either zero or undef.
5834static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
5835 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5836 const SDLoc &dl, unsigned WideSizeInBits) {
5837 assert(Vec.getValueSizeInBits() < WideSizeInBits &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5839, __PRETTY_FUNCTION__))
5838 (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5839, __PRETTY_FUNCTION__))
5839 "Unsupported vector widening type")((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5839, __PRETTY_FUNCTION__))
;
5840 unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
5841 MVT SVT = Vec.getSimpleValueType().getScalarType();
5842 MVT VT = MVT::getVectorVT(SVT, WideNumElts);
5843 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
5844}
5845
5846// Helper function to collect subvector ops that are concatenated together,
5847// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
5848// The subvectors in Ops are guaranteed to be the same type.
5849static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
5850 assert(Ops.empty() && "Expected an empty ops vector")((Ops.empty() && "Expected an empty ops vector") ? static_cast
<void> (0) : __assert_fail ("Ops.empty() && \"Expected an empty ops vector\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5850, __PRETTY_FUNCTION__))
;
5851
5852 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
5853 Ops.append(N->op_begin(), N->op_end());
5854 return true;
5855 }
5856
5857 if (N->getOpcode() == ISD::INSERT_SUBVECTOR) {
5858 SDValue Src = N->getOperand(0);
5859 SDValue Sub = N->getOperand(1);
5860 const APInt &Idx = N->getConstantOperandAPInt(2);
5861 EVT VT = Src.getValueType();
5862 EVT SubVT = Sub.getValueType();
5863
5864 // TODO - Handle more general insert_subvector chains.
5865 if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
5866 Idx == (VT.getVectorNumElements() / 2)) {
5867 // insert_subvector(insert_subvector(undef, x, lo), y, hi)
5868 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
5869 Src.getOperand(1).getValueType() == SubVT &&
5870 isNullConstant(Src.getOperand(2))) {
5871 Ops.push_back(Src.getOperand(1));
5872 Ops.push_back(Sub);
5873 return true;
5874 }
5875 // insert_subvector(x, extract_subvector(x, lo), hi)
5876 if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5877 Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) {
5878 Ops.append(2, Sub);
5879 return true;
5880 }
5881 }
5882 }
5883
5884 return false;
5885}
5886
5887static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
5888 const SDLoc &dl) {
5889 EVT VT = Op.getValueType();
5890 unsigned NumElems = VT.getVectorNumElements();
5891 unsigned SizeInBits = VT.getSizeInBits();
5892 assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&(((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&
"Can't split odd sized vector") ? static_cast<void> (0
) : __assert_fail ("(NumElems % 2) == 0 && (SizeInBits % 2) == 0 && \"Can't split odd sized vector\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5893, __PRETTY_FUNCTION__))
5893 "Can't split