Bug Summary

File:llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 14996, column 21
The result of the '/' expression is undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-11-115256-23437-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp

<
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "Utils/X86ShuffleDecode.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86InstrBuilder.h"
19#include "X86IntrinsicsInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86TargetMachine.h"
22#include "X86TargetObjectFile.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringExtras.h"
27#include "llvm/ADT/StringSwitch.h"
28#include "llvm/Analysis/BlockFrequencyInfo.h"
29#include "llvm/Analysis/EHPersonalities.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/WinEHFuncInfo.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/DiagnosticInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalAlias.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/MC/MCAsmInfo.h"
51#include "llvm/MC/MCContext.h"
52#include "llvm/MC/MCExpr.h"
53#include "llvm/MC/MCSymbol.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/MathExtras.h"
59#include "llvm/Target/TargetOptions.h"
60#include <algorithm>
61#include <bitset>
62#include <cctype>
63#include <numeric>
64using namespace llvm;
65
66#define DEBUG_TYPE"x86-isel" "x86-isel"
67
68STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
69
70static cl::opt<int> ExperimentalPrefLoopAlignment(
71 "x86-experimental-pref-loop-alignment", cl::init(4),
72 cl::desc(
73 "Sets the preferable loop alignment for experiments (as log2 bytes)"
74 "(the last x86-experimental-pref-loop-alignment bits"
75 " of the loop header PC will be 0)."),
76 cl::Hidden);
77
78// Added in 10.0.
79static cl::opt<bool> EnableOldKNLABI(
80 "x86-enable-old-knl-abi", cl::init(false),
81 cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of "
82 "one ZMM register on AVX512F, but not AVX512BW targets."),
83 cl::Hidden);
84
85static cl::opt<bool> MulConstantOptimization(
86 "mul-constant-optimization", cl::init(true),
87 cl::desc("Replace 'mul x, Const' with more effective instructions like "
88 "SHIFT, LEA, etc."),
89 cl::Hidden);
90
91static cl::opt<bool> ExperimentalUnorderedISEL(
92 "x86-experimental-unordered-atomic-isel", cl::init(false),
93 cl::desc("Use LoadSDNode and StoreSDNode instead of "
94 "AtomicSDNode for unordered atomic loads and "
95 "stores respectively."),
96 cl::Hidden);
97
98/// Call this when the user attempts to do something unsupported, like
99/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
100/// report_fatal_error, so calling code should attempt to recover without
101/// crashing.
102static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
103 const char *Msg) {
104 MachineFunction &MF = DAG.getMachineFunction();
105 DAG.getContext()->diagnose(
106 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
107}
108
109X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
110 const X86Subtarget &STI)
111 : TargetLowering(TM), Subtarget(STI) {
112 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
113 X86ScalarSSEf64 = Subtarget.hasSSE2();
114 X86ScalarSSEf32 = Subtarget.hasSSE1();
115 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
116
117 // Set up the TargetLowering object.
118
119 // X86 is weird. It always uses i8 for shift amounts and setcc results.
120 setBooleanContents(ZeroOrOneBooleanContent);
121 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
122 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
123
124 // For 64-bit, since we have so many registers, use the ILP scheduler.
125 // For 32-bit, use the register pressure specific scheduling.
126 // For Atom, always use ILP scheduling.
127 if (Subtarget.isAtom())
128 setSchedulingPreference(Sched::ILP);
129 else if (Subtarget.is64Bit())
130 setSchedulingPreference(Sched::ILP);
131 else
132 setSchedulingPreference(Sched::RegPressure);
133 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
134 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
135
136 // Bypass expensive divides and use cheaper ones.
137 if (TM.getOptLevel() >= CodeGenOpt::Default) {
138 if (Subtarget.hasSlowDivide32())
139 addBypassSlowDiv(32, 8);
140 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
141 addBypassSlowDiv(64, 32);
142 }
143
144 if (Subtarget.isTargetWindowsMSVC() ||
145 Subtarget.isTargetWindowsItanium()) {
146 // Setup Windows compiler runtime calls.
147 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
148 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
149 setLibcallName(RTLIB::SREM_I64, "_allrem");
150 setLibcallName(RTLIB::UREM_I64, "_aullrem");
151 setLibcallName(RTLIB::MUL_I64, "_allmul");
152 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
153 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
154 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
155 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
156 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
157 }
158
159 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
160 // MSVCRT doesn't have powi; fall back to pow
161 setLibcallName(RTLIB::POWI_F32, nullptr);
162 setLibcallName(RTLIB::POWI_F64, nullptr);
163 }
164
165 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
166 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
167 // FIXME: Should we be limitting the atomic size on other configs? Default is
168 // 1024.
169 if (!Subtarget.hasCmpxchg8b())
170 setMaxAtomicSizeInBitsSupported(32);
171
172 // Set up the register classes.
173 addRegisterClass(MVT::i8, &X86::GR8RegClass);
174 addRegisterClass(MVT::i16, &X86::GR16RegClass);
175 addRegisterClass(MVT::i32, &X86::GR32RegClass);
176 if (Subtarget.is64Bit())
177 addRegisterClass(MVT::i64, &X86::GR64RegClass);
178
179 for (MVT VT : MVT::integer_valuetypes())
180 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
181
182 // We don't accept any truncstore of integer registers.
183 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
184 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
185 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
186 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
187 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
188 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
189
190 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
191
192 // SETOEQ and SETUNE require checking two conditions.
193 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
194 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
195 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
196 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
197 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
198 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
199
200 // Integer absolute.
201 if (Subtarget.hasCMov()) {
202 setOperationAction(ISD::ABS , MVT::i16 , Custom);
203 setOperationAction(ISD::ABS , MVT::i32 , Custom);
204 }
205 setOperationAction(ISD::ABS , MVT::i64 , Custom);
206
207 // Funnel shifts.
208 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
209 setOperationAction(ShiftOp , MVT::i16 , Custom);
210 setOperationAction(ShiftOp , MVT::i32 , Custom);
211 if (Subtarget.is64Bit())
212 setOperationAction(ShiftOp , MVT::i64 , Custom);
213 }
214
215 if (!Subtarget.useSoftFloat()) {
216 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
217 // operation.
218 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
219 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
220 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
221 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
222 // We have an algorithm for SSE2, and we turn this into a 64-bit
223 // FILD or VCVTUSI2SS/SD for other targets.
224 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
225 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
226 // We have an algorithm for SSE2->double, and we turn this into a
227 // 64-bit FILD followed by conditional FADD for other targets.
228 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
229 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
230
231 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
232 // this operation.
233 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
234 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
235 // SSE has no i16 to fp conversion, only i32. We promote in the handler
236 // to allow f80 to use i16 and f64 to use i16 with sse1 only
237 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
238 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
239 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
240 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
241 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
242 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
243 // are Legal, f80 is custom lowered.
244 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
245 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
246
247 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
248 // this operation.
249 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
250 // FIXME: This doesn't generate invalid exception when it should. PR44019.
251 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
252 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
253 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
254 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
255 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
256 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
257 // are Legal, f80 is custom lowered.
258 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
259 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
260
261 // Handle FP_TO_UINT by promoting the destination to a larger signed
262 // conversion.
263 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
264 // FIXME: This doesn't generate invalid exception when it should. PR44019.
265 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
266 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
267 // FIXME: This doesn't generate invalid exception when it should. PR44019.
268 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
269 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
270 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
271 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
272 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
273 }
274
275 // Handle address space casts between mixed sized pointers.
276 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
277 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
278
279 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
280 if (!X86ScalarSSEf64) {
281 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
282 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
283 if (Subtarget.is64Bit()) {
284 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
285 // Without SSE, i64->f64 goes through memory.
286 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
287 }
288 } else if (!Subtarget.is64Bit())
289 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
290
291 // Scalar integer divide and remainder are lowered to use operations that
292 // produce two results, to match the available instructions. This exposes
293 // the two-result form to trivial CSE, which is able to combine x/y and x%y
294 // into a single instruction.
295 //
296 // Scalar integer multiply-high is also lowered to use two-result
297 // operations, to match the available instructions. However, plain multiply
298 // (low) operations are left as Legal, as there are single-result
299 // instructions for this in x86. Using the two-result multiply instructions
300 // when both high and low results are needed must be arranged by dagcombine.
301 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
302 setOperationAction(ISD::MULHS, VT, Expand);
303 setOperationAction(ISD::MULHU, VT, Expand);
304 setOperationAction(ISD::SDIV, VT, Expand);
305 setOperationAction(ISD::UDIV, VT, Expand);
306 setOperationAction(ISD::SREM, VT, Expand);
307 setOperationAction(ISD::UREM, VT, Expand);
308 }
309
310 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
311 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
312 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
313 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
314 setOperationAction(ISD::BR_CC, VT, Expand);
315 setOperationAction(ISD::SELECT_CC, VT, Expand);
316 }
317 if (Subtarget.is64Bit())
318 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
319 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
320 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
321 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
322
323 setOperationAction(ISD::FREM , MVT::f32 , Expand);
324 setOperationAction(ISD::FREM , MVT::f64 , Expand);
325 setOperationAction(ISD::FREM , MVT::f80 , Expand);
326 setOperationAction(ISD::FREM , MVT::f128 , Expand);
327 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
328
329 // Promote the i8 variants and force them on up to i32 which has a shorter
330 // encoding.
331 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
332 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
333 if (!Subtarget.hasBMI()) {
334 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
335 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
336 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
337 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
338 if (Subtarget.is64Bit()) {
339 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
340 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
341 }
342 }
343
344 if (Subtarget.hasLZCNT()) {
345 // When promoting the i8 variants, force them to i32 for a shorter
346 // encoding.
347 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
348 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
349 } else {
350 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
351 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
352 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
353 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
354 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
355 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
356 if (Subtarget.is64Bit()) {
357 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
358 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
359 }
360 }
361
362 // Special handling for half-precision floating point conversions.
363 // If we don't have F16C support, then lower half float conversions
364 // into library calls.
365 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
366 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
367 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
368 }
369
370 // There's never any support for operations beyond MVT::f32.
371 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
372 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
373 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
374 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
375 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
376 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
377
378 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
379 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
380 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
381 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
382 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
383 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
384 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
385 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
386
387 if (Subtarget.hasPOPCNT()) {
388 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
389 } else {
390 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
391 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
392 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
393 if (Subtarget.is64Bit())
394 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
395 else
396 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
397 }
398
399 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
400
401 if (!Subtarget.hasMOVBE())
402 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
403
404 // X86 wants to expand cmov itself.
405 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
406 setOperationAction(ISD::SELECT, VT, Custom);
407 setOperationAction(ISD::SETCC, VT, Custom);
408 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
409 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
410 }
411 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
412 if (VT == MVT::i64 && !Subtarget.is64Bit())
413 continue;
414 setOperationAction(ISD::SELECT, VT, Custom);
415 setOperationAction(ISD::SETCC, VT, Custom);
416 }
417
418 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
419 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
420 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
421
422 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
423 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
424 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
425 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
426 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
427 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
428 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
429 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
430
431 // Darwin ABI issue.
432 for (auto VT : { MVT::i32, MVT::i64 }) {
433 if (VT == MVT::i64 && !Subtarget.is64Bit())
434 continue;
435 setOperationAction(ISD::ConstantPool , VT, Custom);
436 setOperationAction(ISD::JumpTable , VT, Custom);
437 setOperationAction(ISD::GlobalAddress , VT, Custom);
438 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
439 setOperationAction(ISD::ExternalSymbol , VT, Custom);
440 setOperationAction(ISD::BlockAddress , VT, Custom);
441 }
442
443 // 64-bit shl, sra, srl (iff 32-bit x86)
444 for (auto VT : { MVT::i32, MVT::i64 }) {
445 if (VT == MVT::i64 && !Subtarget.is64Bit())
446 continue;
447 setOperationAction(ISD::SHL_PARTS, VT, Custom);
448 setOperationAction(ISD::SRA_PARTS, VT, Custom);
449 setOperationAction(ISD::SRL_PARTS, VT, Custom);
450 }
451
452 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
453 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
454
455 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
456
457 // Expand certain atomics
458 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
459 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
460 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
461 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
462 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
463 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
464 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
465 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
466 }
467
468 if (!Subtarget.is64Bit())
469 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
470
471 if (Subtarget.hasCmpxchg16b()) {
472 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
473 }
474
475 // FIXME - use subtarget debug flags
476 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
477 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
478 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
479 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
480 }
481
482 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
483 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
484
485 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
486 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
487
488 setOperationAction(ISD::TRAP, MVT::Other, Legal);
489 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
490
491 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
492 setOperationAction(ISD::VASTART , MVT::Other, Custom);
493 setOperationAction(ISD::VAEND , MVT::Other, Expand);
494 bool Is64Bit = Subtarget.is64Bit();
495 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
496 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
497
498 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
499 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
500
501 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
502
503 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
504 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
505 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
506
507 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
508 // f32 and f64 use SSE.
509 // Set up the FP register classes.
510 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
511 : &X86::FR32RegClass);
512 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
513 : &X86::FR64RegClass);
514
515 // Disable f32->f64 extload as we can only generate this in one instruction
516 // under optsize. So its easier to pattern match (fpext (load)) for that
517 // case instead of needing to emit 2 instructions for extload in the
518 // non-optsize case.
519 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
520
521 for (auto VT : { MVT::f32, MVT::f64 }) {
522 // Use ANDPD to simulate FABS.
523 setOperationAction(ISD::FABS, VT, Custom);
524
525 // Use XORP to simulate FNEG.
526 setOperationAction(ISD::FNEG, VT, Custom);
527
528 // Use ANDPD and ORPD to simulate FCOPYSIGN.
529 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
530
531 // These might be better off as horizontal vector ops.
532 setOperationAction(ISD::FADD, VT, Custom);
533 setOperationAction(ISD::FSUB, VT, Custom);
534
535 // We don't support sin/cos/fmod
536 setOperationAction(ISD::FSIN , VT, Expand);
537 setOperationAction(ISD::FCOS , VT, Expand);
538 setOperationAction(ISD::FSINCOS, VT, Expand);
539 }
540
541 // Lower this to MOVMSK plus an AND.
542 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
543 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
544
545 } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
546 // Use SSE for f32, x87 for f64.
547 // Set up the FP register classes.
548 addRegisterClass(MVT::f32, &X86::FR32RegClass);
549 if (UseX87)
550 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
551
552 // Use ANDPS to simulate FABS.
553 setOperationAction(ISD::FABS , MVT::f32, Custom);
554
555 // Use XORP to simulate FNEG.
556 setOperationAction(ISD::FNEG , MVT::f32, Custom);
557
558 if (UseX87)
559 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
560
561 // Use ANDPS and ORPS to simulate FCOPYSIGN.
562 if (UseX87)
563 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
564 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
565
566 // We don't support sin/cos/fmod
567 setOperationAction(ISD::FSIN , MVT::f32, Expand);
568 setOperationAction(ISD::FCOS , MVT::f32, Expand);
569 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
570
571 if (UseX87) {
572 // Always expand sin/cos functions even though x87 has an instruction.
573 setOperationAction(ISD::FSIN, MVT::f64, Expand);
574 setOperationAction(ISD::FCOS, MVT::f64, Expand);
575 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
576 }
577 } else if (UseX87) {
578 // f32 and f64 in x87.
579 // Set up the FP register classes.
580 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
581 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
582
583 for (auto VT : { MVT::f32, MVT::f64 }) {
584 setOperationAction(ISD::UNDEF, VT, Expand);
585 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
586
587 // Always expand sin/cos functions even though x87 has an instruction.
588 setOperationAction(ISD::FSIN , VT, Expand);
589 setOperationAction(ISD::FCOS , VT, Expand);
590 setOperationAction(ISD::FSINCOS, VT, Expand);
591 }
592 }
593
594 // Expand FP32 immediates into loads from the stack, save special cases.
595 if (isTypeLegal(MVT::f32)) {
596 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
597 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
598 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
599 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
600 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
601 } else // SSE immediates.
602 addLegalFPImmediate(APFloat(+0.0f)); // xorps
603 }
604 // Expand FP64 immediates into loads from the stack, save special cases.
605 if (isTypeLegal(MVT::f64)) {
606 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
607 addLegalFPImmediate(APFloat(+0.0)); // FLD0
608 addLegalFPImmediate(APFloat(+1.0)); // FLD1
609 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
610 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
611 } else // SSE immediates.
612 addLegalFPImmediate(APFloat(+0.0)); // xorpd
613 }
614 // Handle constrained floating-point operations of scalar.
615 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
616 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
617 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
618 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
619 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
620 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
621 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
622 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
623 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
624 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
625 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
626 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
627 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
628
629 // We don't support FMA.
630 setOperationAction(ISD::FMA, MVT::f64, Expand);
631 setOperationAction(ISD::FMA, MVT::f32, Expand);
632
633 // f80 always uses X87.
634 if (UseX87) {
635 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
636 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
637 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
638 {
639 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
640 addLegalFPImmediate(TmpFlt); // FLD0
641 TmpFlt.changeSign();
642 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
643
644 bool ignored;
645 APFloat TmpFlt2(+1.0);
646 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
647 &ignored);
648 addLegalFPImmediate(TmpFlt2); // FLD1
649 TmpFlt2.changeSign();
650 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
651 }
652
653 // Always expand sin/cos functions even though x87 has an instruction.
654 setOperationAction(ISD::FSIN , MVT::f80, Expand);
655 setOperationAction(ISD::FCOS , MVT::f80, Expand);
656 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
657
658 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
659 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
660 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
661 setOperationAction(ISD::FRINT, MVT::f80, Expand);
662 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
663 setOperationAction(ISD::FMA, MVT::f80, Expand);
664 setOperationAction(ISD::LROUND, MVT::f80, Expand);
665 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
666 setOperationAction(ISD::LRINT, MVT::f80, Expand);
667 setOperationAction(ISD::LLRINT, MVT::f80, Expand);
668
669 // Handle constrained floating-point operations of scalar.
670 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
671 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
672 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
673 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
674 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
675 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
676 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
677 // as Custom.
678 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
679 }
680
681 // f128 uses xmm registers, but most operations require libcalls.
682 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
683 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
684 : &X86::VR128RegClass);
685
686 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
687
688 setOperationAction(ISD::FADD, MVT::f128, LibCall);
689 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
690 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
691 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
692 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
693 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
694 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
695 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
696 setOperationAction(ISD::FMA, MVT::f128, LibCall);
697 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
698
699 setOperationAction(ISD::FABS, MVT::f128, Custom);
700 setOperationAction(ISD::FNEG, MVT::f128, Custom);
701 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
702
703 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
704 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
705 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
706 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
707 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
708 // No STRICT_FSINCOS
709 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
710 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
711
712 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
713 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
714 // We need to custom handle any FP_ROUND with an f128 input, but
715 // LegalizeDAG uses the result type to know when to run a custom handler.
716 // So we have to list all legal floating point result types here.
717 if (isTypeLegal(MVT::f32)) {
718 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
719 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
720 }
721 if (isTypeLegal(MVT::f64)) {
722 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
723 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
724 }
725 if (isTypeLegal(MVT::f80)) {
726 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
727 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
728 }
729
730 setOperationAction(ISD::SETCC, MVT::f128, Custom);
731
732 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
733 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
734 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
735 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
736 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
737 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
738 }
739
740 // Always use a library call for pow.
741 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
742 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
743 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
744 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
745
746 setOperationAction(ISD::FLOG, MVT::f80, Expand);
747 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
748 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
749 setOperationAction(ISD::FEXP, MVT::f80, Expand);
750 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
751 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
752 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
753
754 // Some FP actions are always expanded for vector types.
755 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
756 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
757 setOperationAction(ISD::FSIN, VT, Expand);
758 setOperationAction(ISD::FSINCOS, VT, Expand);
759 setOperationAction(ISD::FCOS, VT, Expand);
760 setOperationAction(ISD::FREM, VT, Expand);
761 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
762 setOperationAction(ISD::FPOW, VT, Expand);
763 setOperationAction(ISD::FLOG, VT, Expand);
764 setOperationAction(ISD::FLOG2, VT, Expand);
765 setOperationAction(ISD::FLOG10, VT, Expand);
766 setOperationAction(ISD::FEXP, VT, Expand);
767 setOperationAction(ISD::FEXP2, VT, Expand);
768 }
769
770 // First set operation action for all vector types to either promote
771 // (for widening) or expand (for scalarization). Then we will selectively
772 // turn on ones that can be effectively codegen'd.
773 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
774 setOperationAction(ISD::SDIV, VT, Expand);
775 setOperationAction(ISD::UDIV, VT, Expand);
776 setOperationAction(ISD::SREM, VT, Expand);
777 setOperationAction(ISD::UREM, VT, Expand);
778 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
779 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
780 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
781 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
782 setOperationAction(ISD::FMA, VT, Expand);
783 setOperationAction(ISD::FFLOOR, VT, Expand);
784 setOperationAction(ISD::FCEIL, VT, Expand);
785 setOperationAction(ISD::FTRUNC, VT, Expand);
786 setOperationAction(ISD::FRINT, VT, Expand);
787 setOperationAction(ISD::FNEARBYINT, VT, Expand);
788 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
789 setOperationAction(ISD::MULHS, VT, Expand);
790 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
791 setOperationAction(ISD::MULHU, VT, Expand);
792 setOperationAction(ISD::SDIVREM, VT, Expand);
793 setOperationAction(ISD::UDIVREM, VT, Expand);
794 setOperationAction(ISD::CTPOP, VT, Expand);
795 setOperationAction(ISD::CTTZ, VT, Expand);
796 setOperationAction(ISD::CTLZ, VT, Expand);
797 setOperationAction(ISD::ROTL, VT, Expand);
798 setOperationAction(ISD::ROTR, VT, Expand);
799 setOperationAction(ISD::BSWAP, VT, Expand);
800 setOperationAction(ISD::SETCC, VT, Expand);
801 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
802 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
803 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
804 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
805 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
806 setOperationAction(ISD::TRUNCATE, VT, Expand);
807 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
808 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
809 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
810 setOperationAction(ISD::SELECT_CC, VT, Expand);
811 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
812 setTruncStoreAction(InnerVT, VT, Expand);
813
814 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
815 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
816
817 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
818 // types, we have to deal with them whether we ask for Expansion or not.
819 // Setting Expand causes its own optimisation problems though, so leave
820 // them legal.
821 if (VT.getVectorElementType() == MVT::i1)
822 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
823
824 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
825 // split/scalarized right now.
826 if (VT.getVectorElementType() == MVT::f16)
827 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
828 }
829 }
830
831 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
832 // with -msoft-float, disable use of MMX as well.
833 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
834 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
835 // No operations on x86mmx supported, everything uses intrinsics.
836 }
837
838 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
839 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
840 : &X86::VR128RegClass);
841
842 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
843 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
844 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
845 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
846 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
847 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
848 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
849 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
850
851 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
852 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
853
854 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
855 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
856 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
857 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
858 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
859 }
860
861 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
862 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
863 : &X86::VR128RegClass);
864
865 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
866 // registers cannot be used even for integer operations.
867 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
868 : &X86::VR128RegClass);
869 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
870 : &X86::VR128RegClass);
871 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
872 : &X86::VR128RegClass);
873 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
874 : &X86::VR128RegClass);
875
876 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
877 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
878 setOperationAction(ISD::SDIV, VT, Custom);
879 setOperationAction(ISD::SREM, VT, Custom);
880 setOperationAction(ISD::UDIV, VT, Custom);
881 setOperationAction(ISD::UREM, VT, Custom);
882 }
883
884 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
885 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
886 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
887
888 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
889 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
890 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
891 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
892 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
893 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
894 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
895 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
896 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
897 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
898 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
899 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
900 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
901
902 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
903 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
904 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
905 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
906 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
907 }
908
909 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
910 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
911 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
912 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
913 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
914 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
915 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
916 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
917 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
918 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
919 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
920 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
921
922 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
923 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
924 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
925
926 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
927 setOperationAction(ISD::SETCC, VT, Custom);
928 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
929 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
930 setOperationAction(ISD::CTPOP, VT, Custom);
931 setOperationAction(ISD::ABS, VT, Custom);
932
933 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
934 // setcc all the way to isel and prefer SETGT in some isel patterns.
935 setCondCodeAction(ISD::SETLT, VT, Custom);
936 setCondCodeAction(ISD::SETLE, VT, Custom);
937 }
938
939 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
940 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
941 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
942 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
943 setOperationAction(ISD::VSELECT, VT, Custom);
944 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
945 }
946
947 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
948 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
949 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
950 setOperationAction(ISD::VSELECT, VT, Custom);
951
952 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
953 continue;
954
955 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
956 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
957 }
958
959 // Custom lower v2i64 and v2f64 selects.
960 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
961 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
962 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
963 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
964 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
965
966 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
967 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
968 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
969 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
970
971 // Custom legalize these to avoid over promotion or custom promotion.
972 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
973 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
974 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
975 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
976 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
977 }
978
979 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
980 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
981 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
982 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
983
984 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
985 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
986
987 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
988 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
989
990 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
991 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
992 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
993 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
994 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
995
996 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
997 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
998 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
999 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1000
1001 // We want to legalize this to an f64 load rather than an i64 load on
1002 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1003 // store.
1004 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1005 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1006 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1007 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1008 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1009 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1010
1011 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1012 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1013 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1014 if (!Subtarget.hasAVX512())
1015 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1016
1017 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1018 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1019 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1020
1021 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1022
1023 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1024 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1025 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1026 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1027 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1028 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1029
1030 // In the customized shift lowering, the legal v4i32/v2i64 cases
1031 // in AVX2 will be recognized.
1032 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1033 setOperationAction(ISD::SRL, VT, Custom);
1034 setOperationAction(ISD::SHL, VT, Custom);
1035 setOperationAction(ISD::SRA, VT, Custom);
1036 }
1037
1038 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1039 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1040
1041 // With AVX512, expanding (and promoting the shifts) is better.
1042 if (!Subtarget.hasAVX512())
1043 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1044
1045 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1046 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1047 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1048 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1049 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1050 }
1051
1052 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1053 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1054 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1055 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1056 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1057 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1058 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1059 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1060 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1061
1062 // These might be better off as horizontal vector ops.
1063 setOperationAction(ISD::ADD, MVT::i16, Custom);
1064 setOperationAction(ISD::ADD, MVT::i32, Custom);
1065 setOperationAction(ISD::SUB, MVT::i16, Custom);
1066 setOperationAction(ISD::SUB, MVT::i32, Custom);
1067 }
1068
1069 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1070 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1071 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1072 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1073 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1074 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1075 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1076 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1077 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1078 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1079 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1080 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1081 }
1082
1083 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1084 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1085 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1086 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1087 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1088 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1089 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1090 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1091
1092 // FIXME: Do we need to handle scalar-to-vector here?
1093 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1094
1095 // We directly match byte blends in the backend as they match the VSELECT
1096 // condition form.
1097 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1098
1099 // SSE41 brings specific instructions for doing vector sign extend even in
1100 // cases where we don't have SRA.
1101 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1102 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1103 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1104 }
1105
1106 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1107 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1108 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1109 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1110 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1111 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1112 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1113 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1114 }
1115
1116 // i8 vectors are custom because the source register and source
1117 // source memory operand types are not the same width.
1118 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1119
1120 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1121 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1122 // do the pre and post work in the vector domain.
1123 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1124 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1125 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1126 // so that DAG combine doesn't try to turn it into uint_to_fp.
1127 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1128 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1129 }
1130 }
1131
1132 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1133 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1134 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1135 setOperationAction(ISD::ROTL, VT, Custom);
1136
1137 // XOP can efficiently perform BITREVERSE with VPPERM.
1138 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1139 setOperationAction(ISD::BITREVERSE, VT, Custom);
1140
1141 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1142 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1143 setOperationAction(ISD::BITREVERSE, VT, Custom);
1144 }
1145
1146 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1147 bool HasInt256 = Subtarget.hasInt256();
1148
1149 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1150 : &X86::VR256RegClass);
1151 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1152 : &X86::VR256RegClass);
1153 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1154 : &X86::VR256RegClass);
1155 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1156 : &X86::VR256RegClass);
1157 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1158 : &X86::VR256RegClass);
1159 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1160 : &X86::VR256RegClass);
1161
1162 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1163 setOperationAction(ISD::FFLOOR, VT, Legal);
1164 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1165 setOperationAction(ISD::FCEIL, VT, Legal);
1166 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1167 setOperationAction(ISD::FTRUNC, VT, Legal);
1168 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1169 setOperationAction(ISD::FRINT, VT, Legal);
1170 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1171 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1172 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1173 setOperationAction(ISD::FNEG, VT, Custom);
1174 setOperationAction(ISD::FABS, VT, Custom);
1175 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1176 }
1177
1178 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1179 // even though v8i16 is a legal type.
1180 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1181 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1182 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1183 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1184 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1185 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1186
1187 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1188 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1189
1190 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1191 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1192 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1193 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1194 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1195 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1196 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1197 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1198 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1199 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1200 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1201 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1202
1203 if (!Subtarget.hasAVX512())
1204 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1205
1206 // In the customized shift lowering, the legal v8i32/v4i64 cases
1207 // in AVX2 will be recognized.
1208 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1209 setOperationAction(ISD::SRL, VT, Custom);
1210 setOperationAction(ISD::SHL, VT, Custom);
1211 setOperationAction(ISD::SRA, VT, Custom);
1212 }
1213
1214 // These types need custom splitting if their input is a 128-bit vector.
1215 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1216 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1217 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1218 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1219
1220 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1221 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1222
1223 // With BWI, expanding (and promoting the shifts) is the better.
1224 if (!Subtarget.hasBWI())
1225 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1226
1227 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1228 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1229 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1230 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1231 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1232 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1233
1234 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1235 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1236 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1237 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1238 }
1239
1240 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1241 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1242 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1243 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1244
1245 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1246 setOperationAction(ISD::SETCC, VT, Custom);
1247 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1248 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1249 setOperationAction(ISD::CTPOP, VT, Custom);
1250 setOperationAction(ISD::CTLZ, VT, Custom);
1251
1252 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1253 // setcc all the way to isel and prefer SETGT in some isel patterns.
1254 setCondCodeAction(ISD::SETLT, VT, Custom);
1255 setCondCodeAction(ISD::SETLE, VT, Custom);
1256 }
1257
1258 if (Subtarget.hasAnyFMA()) {
1259 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1260 MVT::v2f64, MVT::v4f64 }) {
1261 setOperationAction(ISD::FMA, VT, Legal);
1262 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1263 }
1264 }
1265
1266 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1267 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1268 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1269 }
1270
1271 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1272 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1273 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1274 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1275
1276 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1277 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1278 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1279 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1280 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1281 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1282
1283 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1284 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1285 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1286 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1287 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1288
1289 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1290 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1291 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1292 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1293 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1294 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1295 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1296 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1297
1298 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1299 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1300 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1301 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1302 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1303 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1304 }
1305
1306 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1307 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1308 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1309 }
1310
1311 if (HasInt256) {
1312 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1313 // when we have a 256bit-wide blend with immediate.
1314 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1315 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1316
1317 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1318 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1319 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1320 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1321 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1322 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1323 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1324 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1325 }
1326 }
1327
1328 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1329 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1330 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1331 setOperationAction(ISD::MSTORE, VT, Legal);
1332 }
1333
1334 // Extract subvector is special because the value type
1335 // (result) is 128-bit but the source is 256-bit wide.
1336 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1337 MVT::v4f32, MVT::v2f64 }) {
1338 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1339 }
1340
1341 // Custom lower several nodes for 256-bit types.
1342 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1343 MVT::v8f32, MVT::v4f64 }) {
1344 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1345 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1346 setOperationAction(ISD::VSELECT, VT, Custom);
1347 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1348 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1349 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1350 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1351 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1352 setOperationAction(ISD::STORE, VT, Custom);
1353 }
1354
1355 if (HasInt256) {
1356 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1357
1358 // Custom legalize 2x32 to get a little better code.
1359 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1360 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1361
1362 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1363 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1364 setOperationAction(ISD::MGATHER, VT, Custom);
1365 }
1366 }
1367
1368 // This block controls legalization of the mask vector sizes that are
1369 // available with AVX512. 512-bit vectors are in a separate block controlled
1370 // by useAVX512Regs.
1371 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1372 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1373 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1374 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1375 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1376 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1377
1378 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1379 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1380 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1381
1382 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1383 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1384 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1385 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1386 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1387 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1388 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1389 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1390 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1391 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1392 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1393 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1394
1395 // There is no byte sized k-register load or store without AVX512DQ.
1396 if (!Subtarget.hasDQI()) {
1397 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1398 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1399 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1400 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1401
1402 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1403 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1404 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1405 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1406 }
1407
1408 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1409 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1410 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1411 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1412 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1413 }
1414
1415 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1416 setOperationAction(ISD::ADD, VT, Custom);
1417 setOperationAction(ISD::SUB, VT, Custom);
1418 setOperationAction(ISD::MUL, VT, Custom);
1419 setOperationAction(ISD::SETCC, VT, Custom);
1420 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1421 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1422 setOperationAction(ISD::SELECT, VT, Custom);
1423 setOperationAction(ISD::TRUNCATE, VT, Custom);
1424 setOperationAction(ISD::UADDSAT, VT, Custom);
1425 setOperationAction(ISD::SADDSAT, VT, Custom);
1426 setOperationAction(ISD::USUBSAT, VT, Custom);
1427 setOperationAction(ISD::SSUBSAT, VT, Custom);
1428
1429 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1430 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1431 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1432 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1433 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1434 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1435 setOperationAction(ISD::VSELECT, VT, Expand);
1436 }
1437
1438 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1439 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1440 }
1441
1442 // This block controls legalization for 512-bit operations with 32/64 bit
1443 // elements. 512-bits can be disabled based on prefer-vector-width and
1444 // required-vector-width function attributes.
1445 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1446 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1447 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1448 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1449 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1450
1451 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1452 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1453 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1454 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1455 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1456 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1457 }
1458
1459 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1460 setOperationAction(ISD::FNEG, VT, Custom);
1461 setOperationAction(ISD::FABS, VT, Custom);
1462 setOperationAction(ISD::FMA, VT, Legal);
1463 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1464 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1465 }
1466
1467 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1468 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1469 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1470 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1471 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1472 }
1473 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1474 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1475 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1476 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1477 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1478 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1479 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1480 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1481
1482 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1483 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1484 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1485 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1486 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1487 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1488 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1489 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1490 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1491 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1492 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1493 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1494
1495 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1496 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1497 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1498 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1499 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1500
1501 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1502 // to 512-bit rather than use the AVX2 instructions so that we can use
1503 // k-masks.
1504 if (!Subtarget.hasVLX()) {
1505 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1506 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1507 setOperationAction(ISD::MLOAD, VT, Custom);
1508 setOperationAction(ISD::MSTORE, VT, Custom);
1509 }
1510 }
1511
1512 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1513 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1514 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1515 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1516 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1517 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1518 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1519 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1520
1521 // Need to custom widen this if we don't have AVX512BW.
1522 setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
1523 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
1524 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
1525
1526 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1527 setOperationAction(ISD::FFLOOR, VT, Legal);
1528 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1529 setOperationAction(ISD::FCEIL, VT, Legal);
1530 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1531 setOperationAction(ISD::FTRUNC, VT, Legal);
1532 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1533 setOperationAction(ISD::FRINT, VT, Legal);
1534 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1535 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1536 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1537
1538 setOperationAction(ISD::SELECT, VT, Custom);
1539 }
1540
1541 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1542 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
1543 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1544 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1545 }
1546
1547 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1548 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1549 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1550 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1551
1552 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1553 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1554
1555 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1556 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1557
1558 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1559 setOperationAction(ISD::SMAX, VT, Legal);
1560 setOperationAction(ISD::UMAX, VT, Legal);
1561 setOperationAction(ISD::SMIN, VT, Legal);
1562 setOperationAction(ISD::UMIN, VT, Legal);
1563 setOperationAction(ISD::ABS, VT, Legal);
1564 setOperationAction(ISD::SRL, VT, Custom);
1565 setOperationAction(ISD::SHL, VT, Custom);
1566 setOperationAction(ISD::SRA, VT, Custom);
1567 setOperationAction(ISD::CTPOP, VT, Custom);
1568 setOperationAction(ISD::ROTL, VT, Custom);
1569 setOperationAction(ISD::ROTR, VT, Custom);
1570 setOperationAction(ISD::SETCC, VT, Custom);
1571 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1572 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1573 setOperationAction(ISD::SELECT, VT, Custom);
1574
1575 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1576 // setcc all the way to isel and prefer SETGT in some isel patterns.
1577 setCondCodeAction(ISD::SETLT, VT, Custom);
1578 setCondCodeAction(ISD::SETLE, VT, Custom);
1579 }
1580
1581 if (Subtarget.hasDQI()) {
1582 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1583 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1584 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1585 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1586 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1587 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1588 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1589 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1590
1591 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1592 }
1593
1594 if (Subtarget.hasCDI()) {
1595 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1596 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1597 setOperationAction(ISD::CTLZ, VT, Legal);
1598 }
1599 } // Subtarget.hasCDI()
1600
1601 if (Subtarget.hasVPOPCNTDQ()) {
1602 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1603 setOperationAction(ISD::CTPOP, VT, Legal);
1604 }
1605
1606 // Extract subvector is special because the value type
1607 // (result) is 256-bit but the source is 512-bit wide.
1608 // 128-bit was made Legal under AVX1.
1609 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1610 MVT::v8f32, MVT::v4f64 })
1611 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1612
1613 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1614 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1615 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1616 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1617 setOperationAction(ISD::VSELECT, VT, Custom);
1618 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1619 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1620 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1621 setOperationAction(ISD::MLOAD, VT, Legal);
1622 setOperationAction(ISD::MSTORE, VT, Legal);
1623 setOperationAction(ISD::MGATHER, VT, Custom);
1624 setOperationAction(ISD::MSCATTER, VT, Custom);
1625 }
1626 if (!Subtarget.hasBWI()) {
1627 // Need to custom split v32i16/v64i8 bitcasts.
1628 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1629 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1630
1631 // Better to split these into two 256-bit ops.
1632 setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom);
1633 setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom);
1634 }
1635
1636 if (Subtarget.hasVBMI2()) {
1637 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1638 setOperationAction(ISD::FSHL, VT, Custom);
1639 setOperationAction(ISD::FSHR, VT, Custom);
1640 }
1641 }
1642 }// has AVX-512
1643
1644 // This block controls legalization for operations that don't have
1645 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1646 // narrower widths.
1647 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1648 // These operations are handled on non-VLX by artificially widening in
1649 // isel patterns.
1650
1651 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1652 Subtarget.hasVLX() ? Legal : Custom);
1653 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1654 Subtarget.hasVLX() ? Legal : Custom);
1655 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1656 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1657 Subtarget.hasVLX() ? Legal : Custom);
1658 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1659 Subtarget.hasVLX() ? Legal : Custom);
1660 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1661 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1662 Subtarget.hasVLX() ? Legal : Custom);
1663 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1664 Subtarget.hasVLX() ? Legal : Custom);
1665 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1666 Subtarget.hasVLX() ? Legal : Custom);
1667 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1668 Subtarget.hasVLX() ? Legal : Custom);
1669
1670 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1671 setOperationAction(ISD::SMAX, VT, Legal);
1672 setOperationAction(ISD::UMAX, VT, Legal);
1673 setOperationAction(ISD::SMIN, VT, Legal);
1674 setOperationAction(ISD::UMIN, VT, Legal);
1675 setOperationAction(ISD::ABS, VT, Legal);
1676 }
1677
1678 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1679 setOperationAction(ISD::ROTL, VT, Custom);
1680 setOperationAction(ISD::ROTR, VT, Custom);
1681 }
1682
1683 // Custom legalize 2x32 to get a little better code.
1684 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1685 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1686
1687 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1688 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1689 setOperationAction(ISD::MSCATTER, VT, Custom);
1690
1691 if (Subtarget.hasDQI()) {
1692 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1693 setOperationAction(ISD::SINT_TO_FP, VT,
1694 Subtarget.hasVLX() ? Legal : Custom);
1695 setOperationAction(ISD::UINT_TO_FP, VT,
1696 Subtarget.hasVLX() ? Legal : Custom);
1697 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1698 Subtarget.hasVLX() ? Legal : Custom);
1699 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1700 Subtarget.hasVLX() ? Legal : Custom);
1701 setOperationAction(ISD::FP_TO_SINT, VT,
1702 Subtarget.hasVLX() ? Legal : Custom);
1703 setOperationAction(ISD::FP_TO_UINT, VT,
1704 Subtarget.hasVLX() ? Legal : Custom);
1705 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1706 Subtarget.hasVLX() ? Legal : Custom);
1707 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1708 Subtarget.hasVLX() ? Legal : Custom);
1709 setOperationAction(ISD::MUL, VT, Legal);
1710 }
1711 }
1712
1713 if (Subtarget.hasCDI()) {
1714 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1715 setOperationAction(ISD::CTLZ, VT, Legal);
1716 }
1717 } // Subtarget.hasCDI()
1718
1719 if (Subtarget.hasVPOPCNTDQ()) {
1720 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1721 setOperationAction(ISD::CTPOP, VT, Legal);
1722 }
1723 }
1724
1725 // This block control legalization of v32i1/v64i1 which are available with
1726 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1727 // useBWIRegs.
1728 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1729 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1730 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1731
1732 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1733 setOperationAction(ISD::ADD, VT, Custom);
1734 setOperationAction(ISD::SUB, VT, Custom);
1735 setOperationAction(ISD::MUL, VT, Custom);
1736 setOperationAction(ISD::VSELECT, VT, Expand);
1737 setOperationAction(ISD::UADDSAT, VT, Custom);
1738 setOperationAction(ISD::SADDSAT, VT, Custom);
1739 setOperationAction(ISD::USUBSAT, VT, Custom);
1740 setOperationAction(ISD::SSUBSAT, VT, Custom);
1741
1742 setOperationAction(ISD::TRUNCATE, VT, Custom);
1743 setOperationAction(ISD::SETCC, VT, Custom);
1744 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1745 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1746 setOperationAction(ISD::SELECT, VT, Custom);
1747 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1748 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1749 }
1750
1751 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1752 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1753 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1754 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1755 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1756 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1757
1758 // Extends from v32i1 masks to 256-bit vectors.
1759 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1760 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1761 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1762 }
1763
1764 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1765 // disabled based on prefer-vector-width and required-vector-width function
1766 // attributes.
1767 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1768 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1769 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1770
1771 // Extends from v64i1 masks to 512-bit vectors.
1772 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1773 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1774 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1775
1776 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1777 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1778 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1779 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1780 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1781 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1782 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1783 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1784 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1785 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1786 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1787 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1788 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1789 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1790 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1791 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1792 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1793 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1794 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1795 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1796 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1797 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1798 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1799
1800 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1801 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1802
1803 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1804
1805 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1806 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1807 setOperationAction(ISD::VSELECT, VT, Custom);
1808 setOperationAction(ISD::ABS, VT, Legal);
1809 setOperationAction(ISD::SRL, VT, Custom);
1810 setOperationAction(ISD::SHL, VT, Custom);
1811 setOperationAction(ISD::SRA, VT, Custom);
1812 setOperationAction(ISD::MLOAD, VT, Legal);
1813 setOperationAction(ISD::MSTORE, VT, Legal);
1814 setOperationAction(ISD::CTPOP, VT, Custom);
1815 setOperationAction(ISD::CTLZ, VT, Custom);
1816 setOperationAction(ISD::SMAX, VT, Legal);
1817 setOperationAction(ISD::UMAX, VT, Legal);
1818 setOperationAction(ISD::SMIN, VT, Legal);
1819 setOperationAction(ISD::UMIN, VT, Legal);
1820 setOperationAction(ISD::SETCC, VT, Custom);
1821 setOperationAction(ISD::UADDSAT, VT, Legal);
1822 setOperationAction(ISD::SADDSAT, VT, Legal);
1823 setOperationAction(ISD::USUBSAT, VT, Legal);
1824 setOperationAction(ISD::SSUBSAT, VT, Legal);
1825 setOperationAction(ISD::SELECT, VT, Custom);
1826
1827 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1828 // setcc all the way to isel and prefer SETGT in some isel patterns.
1829 setCondCodeAction(ISD::SETLT, VT, Custom);
1830 setCondCodeAction(ISD::SETLE, VT, Custom);
1831 }
1832
1833 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1834 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1835 }
1836
1837 if (Subtarget.hasBITALG()) {
1838 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1839 setOperationAction(ISD::CTPOP, VT, Legal);
1840 }
1841
1842 if (Subtarget.hasVBMI2()) {
1843 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1844 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1845 }
1846 }
1847
1848 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1849 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1850 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1851 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1852 }
1853
1854 // These operations are handled on non-VLX by artificially widening in
1855 // isel patterns.
1856 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1857
1858 if (Subtarget.hasBITALG()) {
1859 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1860 setOperationAction(ISD::CTPOP, VT, Legal);
1861 }
1862 }
1863
1864 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1865 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1866 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1867 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1868 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1869 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1870
1871 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1872 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1873 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1874 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1875 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1876
1877 if (Subtarget.hasDQI()) {
1878 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1879 // v2f32 UINT_TO_FP is already custom under SSE2.
1880 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
1881 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
1882 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
;
1883 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1884 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1885 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1886 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1887 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1888 }
1889
1890 if (Subtarget.hasBWI()) {
1891 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1892 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1893 }
1894
1895 if (Subtarget.hasVBMI2()) {
1896 // TODO: Make these legal even without VLX?
1897 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1898 MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1899 setOperationAction(ISD::FSHL, VT, Custom);
1900 setOperationAction(ISD::FSHR, VT, Custom);
1901 }
1902 }
1903
1904 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1905 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1906 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1907 }
1908
1909 // We want to custom lower some of our intrinsics.
1910 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1911 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1912 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1913 if (!Subtarget.is64Bit()) {
1914 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1915 }
1916
1917 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1918 // handle type legalization for these operations here.
1919 //
1920 // FIXME: We really should do custom legalization for addition and
1921 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1922 // than generic legalization for 64-bit multiplication-with-overflow, though.
1923 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1924 if (VT == MVT::i64 && !Subtarget.is64Bit())
1925 continue;
1926 // Add/Sub/Mul with overflow operations are custom lowered.
1927 setOperationAction(ISD::SADDO, VT, Custom);
1928 setOperationAction(ISD::UADDO, VT, Custom);
1929 setOperationAction(ISD::SSUBO, VT, Custom);
1930 setOperationAction(ISD::USUBO, VT, Custom);
1931 setOperationAction(ISD::SMULO, VT, Custom);
1932 setOperationAction(ISD::UMULO, VT, Custom);
1933
1934 // Support carry in as value rather than glue.
1935 setOperationAction(ISD::ADDCARRY, VT, Custom);
1936 setOperationAction(ISD::SUBCARRY, VT, Custom);
1937 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1938 }
1939
1940 if (!Subtarget.is64Bit()) {
1941 // These libcalls are not available in 32-bit.
1942 setLibcallName(RTLIB::SHL_I128, nullptr);
1943 setLibcallName(RTLIB::SRL_I128, nullptr);
1944 setLibcallName(RTLIB::SRA_I128, nullptr);
1945 setLibcallName(RTLIB::MUL_I128, nullptr);
1946 }
1947
1948 // Combine sin / cos into _sincos_stret if it is available.
1949 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1950 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1951 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1952 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1953 }
1954
1955 if (Subtarget.isTargetWin64()) {
1956 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1957 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1958 setOperationAction(ISD::SREM, MVT::i128, Custom);
1959 setOperationAction(ISD::UREM, MVT::i128, Custom);
1960 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1961 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1962 }
1963
1964 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1965 // is. We should promote the value to 64-bits to solve this.
1966 // This is what the CRT headers do - `fmodf` is an inline header
1967 // function casting to f64 and calling `fmod`.
1968 if (Subtarget.is32Bit() &&
1969 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
1970 for (ISD::NodeType Op :
1971 {ISD::FCEIL, ISD::STRICT_FCEIL,
1972 ISD::FCOS, ISD::STRICT_FCOS,
1973 ISD::FEXP, ISD::STRICT_FEXP,
1974 ISD::FFLOOR, ISD::STRICT_FFLOOR,
1975 ISD::FREM, ISD::STRICT_FREM,
1976 ISD::FLOG, ISD::STRICT_FLOG,
1977 ISD::FLOG10, ISD::STRICT_FLOG10,
1978 ISD::FPOW, ISD::STRICT_FPOW,
1979 ISD::FSIN, ISD::STRICT_FSIN})
1980 if (isOperationExpand(Op, MVT::f32))
1981 setOperationAction(Op, MVT::f32, Promote);
1982
1983 // We have target-specific dag combine patterns for the following nodes:
1984 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1985 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1986 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1987 setTargetDAGCombine(ISD::CONCAT_VECTORS);
1988 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1989 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1990 setTargetDAGCombine(ISD::BITCAST);
1991 setTargetDAGCombine(ISD::VSELECT);
1992 setTargetDAGCombine(ISD::SELECT);
1993 setTargetDAGCombine(ISD::SHL);
1994 setTargetDAGCombine(ISD::SRA);
1995 setTargetDAGCombine(ISD::SRL);
1996 setTargetDAGCombine(ISD::OR);
1997 setTargetDAGCombine(ISD::AND);
1998 setTargetDAGCombine(ISD::ADD);
1999 setTargetDAGCombine(ISD::FADD);
2000 setTargetDAGCombine(ISD::FSUB);
2001 setTargetDAGCombine(ISD::FNEG);
2002 setTargetDAGCombine(ISD::FMA);
2003 setTargetDAGCombine(ISD::FMINNUM);
2004 setTargetDAGCombine(ISD::FMAXNUM);
2005 setTargetDAGCombine(ISD::SUB);
2006 setTargetDAGCombine(ISD::LOAD);
2007 setTargetDAGCombine(ISD::MLOAD);
2008 setTargetDAGCombine(ISD::STORE);
2009 setTargetDAGCombine(ISD::MSTORE);
2010 setTargetDAGCombine(ISD::TRUNCATE);
2011 setTargetDAGCombine(ISD::ZERO_EXTEND);
2012 setTargetDAGCombine(ISD::ANY_EXTEND);
2013 setTargetDAGCombine(ISD::SIGN_EXTEND);
2014 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2015 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2016 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2017 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2018 setTargetDAGCombine(ISD::SINT_TO_FP);
2019 setTargetDAGCombine(ISD::UINT_TO_FP);
2020 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2021 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2022 setTargetDAGCombine(ISD::SETCC);
2023 setTargetDAGCombine(ISD::MUL);
2024 setTargetDAGCombine(ISD::XOR);
2025 setTargetDAGCombine(ISD::MSCATTER);
2026 setTargetDAGCombine(ISD::MGATHER);
2027
2028 computeRegisterProperties(Subtarget.getRegisterInfo());
2029
2030 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2031 MaxStoresPerMemsetOptSize = 8;
2032 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2033 MaxStoresPerMemcpyOptSize = 4;
2034 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2035 MaxStoresPerMemmoveOptSize = 4;
2036
2037 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2038 // that needs to benchmarked and balanced with the potential use of vector
2039 // load/store types (PR33329, PR33914).
2040 MaxLoadsPerMemcmp = 2;
2041 MaxLoadsPerMemcmpOptSize = 2;
2042
2043 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
2044 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
2045
2046 // An out-of-order CPU can speculatively execute past a predictable branch,
2047 // but a conditional move could be stalled by an expensive earlier operation.
2048 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2049 EnableExtLdPromotion = true;
2050 setPrefFunctionAlignment(Align(16));
2051
2052 verifyIntrinsicTables();
2053
2054 // Default to having -disable-strictnode-mutation on
2055 IsStrictFPEnabled = true;
2056}
2057
2058// This has so far only been implemented for 64-bit MachO.
2059bool X86TargetLowering::useLoadStackGuardNode() const {
2060 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2061}
2062
2063bool X86TargetLowering::useStackGuardXorFP() const {
2064 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2065 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2066}
2067
2068SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2069 const SDLoc &DL) const {
2070 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2071 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2072 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2073 return SDValue(Node, 0);
2074}
2075
2076TargetLoweringBase::LegalizeTypeAction
2077X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2078 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
2079 return TypeSplitVector;
2080
2081 if (VT.getVectorNumElements() != 1 &&
2082 VT.getVectorElementType() != MVT::i1)
2083 return TypeWidenVector;
2084
2085 return TargetLoweringBase::getPreferredVectorAction(VT);
2086}
2087
2088MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2089 CallingConv::ID CC,
2090 EVT VT) const {
2091 // v32i1 vectors should be promoted to v32i8 to match avx2.
2092 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
2093 return MVT::v32i8;
2094 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2095 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2096 Subtarget.hasAVX512() &&
2097 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2098 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
2099 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
2100 return MVT::i8;
2101 // Split v64i1 vectors if we don't have v64i8 available.
2102 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2103 CC != CallingConv::X86_RegCall)
2104 return MVT::v32i1;
2105 // FIXME: Should we just make these types legal and custom split operations?
2106 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
2107 Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
2108 return MVT::v16i32;
2109 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2110}
2111
2112unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2113 CallingConv::ID CC,
2114 EVT VT) const {
2115 // v32i1 vectors should be promoted to v32i8 to match avx2.
2116 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
2117 return 1;
2118 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2119 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2120 Subtarget.hasAVX512() &&
2121 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2122 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
2123 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
2124 return VT.getVectorNumElements();
2125 // Split v64i1 vectors if we don't have v64i8 available.
2126 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2127 CC != CallingConv::X86_RegCall)
2128 return 2;
2129 // FIXME: Should we just make these types legal and custom split operations?
2130 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
2131 Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
2132 return 1;
2133 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2134}
2135
2136unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2137 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2138 unsigned &NumIntermediates, MVT &RegisterVT) const {
2139 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2140 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2141 Subtarget.hasAVX512() &&
2142 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2143 (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
2144 (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
2145 RegisterVT = MVT::i8;
2146 IntermediateVT = MVT::i1;
2147 NumIntermediates = VT.getVectorNumElements();
2148 return NumIntermediates;
2149 }
2150
2151 // Split v64i1 vectors if we don't have v64i8 available.
2152 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2153 CC != CallingConv::X86_RegCall) {
2154 RegisterVT = MVT::v32i1;
2155 IntermediateVT = MVT::v32i1;
2156 NumIntermediates = 2;
2157 return 2;
2158 }
2159
2160 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2161 NumIntermediates, RegisterVT);
2162}
2163
2164EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2165 LLVMContext& Context,
2166 EVT VT) const {
2167 if (!VT.isVector())
2168 return MVT::i8;
2169
2170 if (Subtarget.hasAVX512()) {
2171 const unsigned NumElts = VT.getVectorNumElements();
2172
2173 // Figure out what this type will be legalized to.
2174 EVT LegalVT = VT;
2175 while (getTypeAction(Context, LegalVT) != TypeLegal)
2176 LegalVT = getTypeToTransformTo(Context, LegalVT);
2177
2178 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2179 if (LegalVT.getSimpleVT().is512BitVector())
2180 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2181
2182 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2183 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2184 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2185 // vXi16/vXi8.
2186 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2187 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2188 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2189 }
2190 }
2191
2192 return VT.changeVectorElementTypeToInteger();
2193}
2194
2195/// Helper for getByValTypeAlignment to determine
2196/// the desired ByVal argument alignment.
2197static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
2198 if (MaxAlign == 16)
2199 return;
2200 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2201 if (VTy->getBitWidth() == 128)
2202 MaxAlign = 16;
2203 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2204 unsigned EltAlign = 0;
2205 getMaxByValAlign(ATy->getElementType(), EltAlign);
2206 if (EltAlign > MaxAlign)
2207 MaxAlign = EltAlign;
2208 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2209 for (auto *EltTy : STy->elements()) {
2210 unsigned EltAlign = 0;
2211 getMaxByValAlign(EltTy, EltAlign);
2212 if (EltAlign > MaxAlign)
2213 MaxAlign = EltAlign;
2214 if (MaxAlign == 16)
2215 break;
2216 }
2217 }
2218}
2219
2220/// Return the desired alignment for ByVal aggregate
2221/// function arguments in the caller parameter area. For X86, aggregates
2222/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2223/// are at 4-byte boundaries.
2224unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2225 const DataLayout &DL) const {
2226 if (Subtarget.is64Bit()) {
2227 // Max of 8 and alignment of type.
2228 unsigned TyAlign = DL.getABITypeAlignment(Ty);
2229 if (TyAlign > 8)
2230 return TyAlign;
2231 return 8;
2232 }
2233
2234 unsigned Align = 4;
2235 if (Subtarget.hasSSE1())
2236 getMaxByValAlign(Ty, Align);
2237 return Align;
2238}
2239
2240/// Returns the target specific optimal type for load
2241/// and store operations as a result of memset, memcpy, and memmove
2242/// lowering. If DstAlign is zero that means it's safe to destination
2243/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
2244/// means there isn't a need to check it against alignment requirement,
2245/// probably because the source does not need to be loaded. If 'IsMemset' is
2246/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
2247/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
2248/// source is constant so it does not need to be loaded.
2249/// It returns EVT::Other if the type should be determined using generic
2250/// target-independent logic.
2251/// For vector ops we check that the overall size isn't larger than our
2252/// preferred vector width.
2253EVT X86TargetLowering::getOptimalMemOpType(
2254 uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
2255 bool ZeroMemset, bool MemcpyStrSrc,
2256 const AttributeList &FuncAttributes) const {
2257 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2258 if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
2259 ((DstAlign == 0 || DstAlign >= 16) &&
2260 (SrcAlign == 0 || SrcAlign >= 16)))) {
2261 // FIXME: Check if unaligned 64-byte accesses are slow.
2262 if (Size >= 64 && Subtarget.hasAVX512() &&
2263 (Subtarget.getPreferVectorWidth() >= 512)) {
2264 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2265 }
2266 // FIXME: Check if unaligned 32-byte accesses are slow.
2267 if (Size >= 32 && Subtarget.hasAVX() &&
2268 (Subtarget.getPreferVectorWidth() >= 256)) {
2269 // Although this isn't a well-supported type for AVX1, we'll let
2270 // legalization and shuffle lowering produce the optimal codegen. If we
2271 // choose an optimal type with a vector element larger than a byte,
2272 // getMemsetStores() may create an intermediate splat (using an integer
2273 // multiply) before we splat as a vector.
2274 return MVT::v32i8;
2275 }
2276 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2277 return MVT::v16i8;
2278 // TODO: Can SSE1 handle a byte vector?
2279 // If we have SSE1 registers we should be able to use them.
2280 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2281 (Subtarget.getPreferVectorWidth() >= 128))
2282 return MVT::v4f32;
2283 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
2284 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2285 // Do not use f64 to lower memcpy if source is string constant. It's
2286 // better to use i32 to avoid the loads.
2287 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2288 // The gymnastics of splatting a byte value into an XMM register and then
2289 // only using 8-byte stores (because this is a CPU with slow unaligned
2290 // 16-byte accesses) makes that a loser.
2291 return MVT::f64;
2292 }
2293 }
2294 // This is a compromise. If we reach here, unaligned accesses may be slow on
2295 // this target. However, creating smaller, aligned accesses could be even
2296 // slower and would certainly be a lot more code.
2297 if (Subtarget.is64Bit() && Size >= 8)
2298 return MVT::i64;
2299 return MVT::i32;
2300}
2301
2302bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2303 if (VT == MVT::f32)
2304 return X86ScalarSSEf32;
2305 else if (VT == MVT::f64)
2306 return X86ScalarSSEf64;
2307 return true;
2308}
2309
2310bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2311 EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
2312 bool *Fast) const {
2313 if (Fast) {
2314 switch (VT.getSizeInBits()) {
2315 default:
2316 // 8-byte and under are always assumed to be fast.
2317 *Fast = true;
2318 break;
2319 case 128:
2320 *Fast = !Subtarget.isUnalignedMem16Slow();
2321 break;
2322 case 256:
2323 *Fast = !Subtarget.isUnalignedMem32Slow();
2324 break;
2325 // TODO: What about AVX-512 (512-bit) accesses?
2326 }
2327 }
2328 // NonTemporal vector memory ops must be aligned.
2329 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2330 // NT loads can only be vector aligned, so if its less aligned than the
2331 // minimum vector size (which we can split the vector down to), we might as
2332 // well use a regular unaligned vector load.
2333 // We don't have any NT loads pre-SSE41.
2334 if (!!(Flags & MachineMemOperand::MOLoad))
2335 return (Align < 16 || !Subtarget.hasSSE41());
2336 return false;
2337 }
2338 // Misaligned accesses of any size are always allowed.
2339 return true;
2340}
2341
2342/// Return the entry encoding for a jump table in the
2343/// current function. The returned value is a member of the
2344/// MachineJumpTableInfo::JTEntryKind enum.
2345unsigned X86TargetLowering::getJumpTableEncoding() const {
2346 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2347 // symbol.
2348 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2349 return MachineJumpTableInfo::EK_Custom32;
2350
2351 // Otherwise, use the normal jump table encoding heuristics.
2352 return TargetLowering::getJumpTableEncoding();
2353}
2354
2355bool X86TargetLowering::useSoftFloat() const {
2356 return Subtarget.useSoftFloat();
2357}
2358
2359void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2360 ArgListTy &Args) const {
2361
2362 // Only relabel X86-32 for C / Stdcall CCs.
2363 if (Subtarget.is64Bit())
2364 return;
2365 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2366 return;
2367 unsigned ParamRegs = 0;
2368 if (auto *M = MF->getFunction().getParent())
2369 ParamRegs = M->getNumberRegisterParameters();
2370
2371 // Mark the first N int arguments as having reg
2372 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2373 Type *T = Args[Idx].Ty;
2374 if (T->isIntOrPtrTy())
2375 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2376 unsigned numRegs = 1;
2377 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2378 numRegs = 2;
2379 if (ParamRegs < numRegs)
2380 return;
2381 ParamRegs -= numRegs;
2382 Args[Idx].IsInReg = true;
2383 }
2384 }
2385}
2386
2387const MCExpr *
2388X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2389 const MachineBasicBlock *MBB,
2390 unsigned uid,MCContext &Ctx) const{
2391 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2391, __PRETTY_FUNCTION__))
;
2392 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2393 // entries.
2394 return MCSymbolRefExpr::create(MBB->getSymbol(),
2395 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2396}
2397
2398/// Returns relocation base for the given PIC jumptable.
2399SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2400 SelectionDAG &DAG) const {
2401 if (!Subtarget.is64Bit())
2402 // This doesn't have SDLoc associated with it, but is not really the
2403 // same as a Register.
2404 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2405 getPointerTy(DAG.getDataLayout()));
2406 return Table;
2407}
2408
2409/// This returns the relocation base for the given PIC jumptable,
2410/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2411const MCExpr *X86TargetLowering::
2412getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2413 MCContext &Ctx) const {
2414 // X86-64 uses RIP relative addressing based on the jump table label.
2415 if (Subtarget.isPICStyleRIPRel())
2416 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2417
2418 // Otherwise, the reference is relative to the PIC base.
2419 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2420}
2421
2422std::pair<const TargetRegisterClass *, uint8_t>
2423X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2424 MVT VT) const {
2425 const TargetRegisterClass *RRC = nullptr;
2426 uint8_t Cost = 1;
2427 switch (VT.SimpleTy) {
2428 default:
2429 return TargetLowering::findRepresentativeClass(TRI, VT);
2430 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2431 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2432 break;
2433 case MVT::x86mmx:
2434 RRC = &X86::VR64RegClass;
2435 break;
2436 case MVT::f32: case MVT::f64:
2437 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2438 case MVT::v4f32: case MVT::v2f64:
2439 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2440 case MVT::v8f32: case MVT::v4f64:
2441 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2442 case MVT::v16f32: case MVT::v8f64:
2443 RRC = &X86::VR128XRegClass;
2444 break;
2445 }
2446 return std::make_pair(RRC, Cost);
2447}
2448
2449unsigned X86TargetLowering::getAddressSpace() const {
2450 if (Subtarget.is64Bit())
2451 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2452 return 256;
2453}
2454
2455static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2456 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2457 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2458}
2459
2460static Constant* SegmentOffset(IRBuilder<> &IRB,
2461 unsigned Offset, unsigned AddressSpace) {
2462 return ConstantExpr::getIntToPtr(
2463 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2464 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2465}
2466
2467Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2468 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2469 // tcbhead_t; use it instead of the usual global variable (see
2470 // sysdeps/{i386,x86_64}/nptl/tls.h)
2471 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2472 if (Subtarget.isTargetFuchsia()) {
2473 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2474 return SegmentOffset(IRB, 0x10, getAddressSpace());
2475 } else {
2476 // %fs:0x28, unless we're using a Kernel code model, in which case
2477 // it's %gs:0x28. gs:0x14 on i386.
2478 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2479 return SegmentOffset(IRB, Offset, getAddressSpace());
2480 }
2481 }
2482
2483 return TargetLowering::getIRStackGuard(IRB);
2484}
2485
2486void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2487 // MSVC CRT provides functionalities for stack protection.
2488 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2489 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2490 // MSVC CRT has a global variable holding security cookie.
2491 M.getOrInsertGlobal("__security_cookie",
2492 Type::getInt8PtrTy(M.getContext()));
2493
2494 // MSVC CRT has a function to validate security cookie.
2495 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2496 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2497 Type::getInt8PtrTy(M.getContext()));
2498 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2499 F->setCallingConv(CallingConv::X86_FastCall);
2500 F->addAttribute(1, Attribute::AttrKind::InReg);
2501 }
2502 return;
2503 }
2504 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2505 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2506 return;
2507 TargetLowering::insertSSPDeclarations(M);
2508}
2509
2510Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2511 // MSVC CRT has a global variable holding security cookie.
2512 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2513 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2514 return M.getGlobalVariable("__security_cookie");
2515 }
2516 return TargetLowering::getSDagStackGuard(M);
2517}
2518
2519Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2520 // MSVC CRT has a function to validate security cookie.
2521 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2522 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2523 return M.getFunction("__security_check_cookie");
2524 }
2525 return TargetLowering::getSSPStackGuardCheck(M);
2526}
2527
2528Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2529 if (Subtarget.getTargetTriple().isOSContiki())
2530 return getDefaultSafeStackPointerLocation(IRB, false);
2531
2532 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2533 // definition of TLS_SLOT_SAFESTACK in
2534 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2535 if (Subtarget.isTargetAndroid()) {
2536 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2537 // %gs:0x24 on i386
2538 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2539 return SegmentOffset(IRB, Offset, getAddressSpace());
2540 }
2541
2542 // Fuchsia is similar.
2543 if (Subtarget.isTargetFuchsia()) {
2544 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2545 return SegmentOffset(IRB, 0x18, getAddressSpace());
2546 }
2547
2548 return TargetLowering::getSafeStackPointerLocation(IRB);
2549}
2550
2551bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2552 unsigned DestAS) const {
2553 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2553, __PRETTY_FUNCTION__))
;
2554
2555 const TargetMachine &TM = getTargetMachine();
2556 if (TM.getPointerSize(SrcAS) != TM.getPointerSize(DestAS))
2557 return false;
2558
2559 return SrcAS < 256 && DestAS < 256;
2560}
2561
2562//===----------------------------------------------------------------------===//
2563// Return Value Calling Convention Implementation
2564//===----------------------------------------------------------------------===//
2565
2566bool X86TargetLowering::CanLowerReturn(
2567 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2568 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2569 SmallVector<CCValAssign, 16> RVLocs;
2570 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2571 return CCInfo.CheckReturn(Outs, RetCC_X86);
2572}
2573
2574const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2575 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2576 return ScratchRegs;
2577}
2578
2579/// Lowers masks values (v*i1) to the local register values
2580/// \returns DAG node after lowering to register type
2581static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2582 const SDLoc &Dl, SelectionDAG &DAG) {
2583 EVT ValVT = ValArg.getValueType();
2584
2585 if (ValVT == MVT::v1i1)
2586 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2587 DAG.getIntPtrConstant(0, Dl));
2588
2589 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2590 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2591 // Two stage lowering might be required
2592 // bitcast: v8i1 -> i8 / v16i1 -> i16
2593 // anyextend: i8 -> i32 / i16 -> i32
2594 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2595 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2596 if (ValLoc == MVT::i32)
2597 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2598 return ValToCopy;
2599 }
2600
2601 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2602 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2603 // One stage lowering is required
2604 // bitcast: v32i1 -> i32 / v64i1 -> i64
2605 return DAG.getBitcast(ValLoc, ValArg);
2606 }
2607
2608 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2609}
2610
2611/// Breaks v64i1 value into two registers and adds the new node to the DAG
2612static void Passv64i1ArgInRegs(
2613 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2614 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, CCValAssign &VA,
2615 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2616 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2616, __PRETTY_FUNCTION__))
;
2617 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2617, __PRETTY_FUNCTION__))
;
2618 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2618, __PRETTY_FUNCTION__))
;
2619 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2620, __PRETTY_FUNCTION__))
2620 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2620, __PRETTY_FUNCTION__))
;
2621
2622 // Before splitting the value we cast it to i64
2623 Arg = DAG.getBitcast(MVT::i64, Arg);
2624
2625 // Splitting the value into two i32 types
2626 SDValue Lo, Hi;
2627 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2628 DAG.getConstant(0, Dl, MVT::i32));
2629 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2630 DAG.getConstant(1, Dl, MVT::i32));
2631
2632 // Attach the two i32 types into corresponding registers
2633 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2634 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2635}
2636
2637SDValue
2638X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2639 bool isVarArg,
2640 const SmallVectorImpl<ISD::OutputArg> &Outs,
2641 const SmallVectorImpl<SDValue> &OutVals,
2642 const SDLoc &dl, SelectionDAG &DAG) const {
2643 MachineFunction &MF = DAG.getMachineFunction();
2644 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2645
2646 // In some cases we need to disable registers from the default CSR list.
2647 // For example, when they are used for argument passing.
2648 bool ShouldDisableCalleeSavedRegister =
2649 CallConv == CallingConv::X86_RegCall ||
2650 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2651
2652 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2653 report_fatal_error("X86 interrupts may not return any value");
2654
2655 SmallVector<CCValAssign, 16> RVLocs;
2656 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2657 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2658
2659 SDValue Flag;
2660 SmallVector<SDValue, 6> RetOps;
2661 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2662 // Operand #1 = Bytes To Pop
2663 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2664 MVT::i32));
2665
2666 // Copy the result values into the output registers.
2667 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2668 ++I, ++OutsIndex) {
2669 CCValAssign &VA = RVLocs[I];
2670 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2670, __PRETTY_FUNCTION__))
;
2671
2672 // Add the register to the CalleeSaveDisableRegs list.
2673 if (ShouldDisableCalleeSavedRegister)
2674 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2675
2676 SDValue ValToCopy = OutVals[OutsIndex];
2677 EVT ValVT = ValToCopy.getValueType();
2678
2679 // Promote values to the appropriate types.
2680 if (VA.getLocInfo() == CCValAssign::SExt)
2681 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2682 else if (VA.getLocInfo() == CCValAssign::ZExt)
2683 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2684 else if (VA.getLocInfo() == CCValAssign::AExt) {
2685 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2686 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2687 else
2688 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2689 }
2690 else if (VA.getLocInfo() == CCValAssign::BCvt)
2691 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2692
2693 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2694, __PRETTY_FUNCTION__))
2694 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2694, __PRETTY_FUNCTION__))
;
2695
2696 // If this is x86-64, and we disabled SSE, we can't return FP values,
2697 // or SSE or MMX vectors.
2698 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2699 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2700 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2701 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2702 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2703 } else if (ValVT == MVT::f64 &&
2704 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2705 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2706 // llvm-gcc has never done it right and no one has noticed, so this
2707 // should be OK for now.
2708 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2709 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2710 }
2711
2712 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2713 // the RET instruction and handled by the FP Stackifier.
2714 if (VA.getLocReg() == X86::FP0 ||
2715 VA.getLocReg() == X86::FP1) {
2716 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2717 // change the value to the FP stack register class.
2718 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2719 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2720 RetOps.push_back(ValToCopy);
2721 // Don't emit a copytoreg.
2722 continue;
2723 }
2724
2725 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2726 // which is returned in RAX / RDX.
2727 if (Subtarget.is64Bit()) {
2728 if (ValVT == MVT::x86mmx) {
2729 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2730 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2731 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2732 ValToCopy);
2733 // If we don't have SSE2 available, convert to v4f32 so the generated
2734 // register is legal.
2735 if (!Subtarget.hasSSE2())
2736 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2737 }
2738 }
2739 }
2740
2741 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2742
2743 if (VA.needsCustom()) {
2744 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2745, __PRETTY_FUNCTION__))
2745 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2745, __PRETTY_FUNCTION__))
;
2746
2747 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RegsToPass, VA, RVLocs[++I],
2748 Subtarget);
2749
2750 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2751, __PRETTY_FUNCTION__))
2751 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2751, __PRETTY_FUNCTION__))
;
2752
2753 // Add the second register to the CalleeSaveDisableRegs list.
2754 if (ShouldDisableCalleeSavedRegister)
2755 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2756 } else {
2757 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2758 }
2759
2760 // Add nodes to the DAG and add the values into the RetOps list
2761 for (auto &Reg : RegsToPass) {
2762 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2763 Flag = Chain.getValue(1);
2764 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2765 }
2766 }
2767
2768 // Swift calling convention does not require we copy the sret argument
2769 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2770
2771 // All x86 ABIs require that for returning structs by value we copy
2772 // the sret argument into %rax/%eax (depending on ABI) for the return.
2773 // We saved the argument into a virtual register in the entry block,
2774 // so now we copy the value out and into %rax/%eax.
2775 //
2776 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2777 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2778 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2779 // either case FuncInfo->setSRetReturnReg() will have been called.
2780 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2781 // When we have both sret and another return value, we should use the
2782 // original Chain stored in RetOps[0], instead of the current Chain updated
2783 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2784
2785 // For the case of sret and another return value, we have
2786 // Chain_0 at the function entry
2787 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2788 // If we use Chain_1 in getCopyFromReg, we will have
2789 // Val = getCopyFromReg(Chain_1)
2790 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2791
2792 // getCopyToReg(Chain_0) will be glued together with
2793 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2794 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2795 // Data dependency from Unit B to Unit A due to usage of Val in
2796 // getCopyToReg(Chain_1, Val)
2797 // Chain dependency from Unit A to Unit B
2798
2799 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2800 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2801 getPointerTy(MF.getDataLayout()));
2802
2803 unsigned RetValReg
2804 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2805 X86::RAX : X86::EAX;
2806 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2807 Flag = Chain.getValue(1);
2808
2809 // RAX/EAX now acts like a return value.
2810 RetOps.push_back(
2811 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2812
2813 // Add the returned register to the CalleeSaveDisableRegs list.
2814 if (ShouldDisableCalleeSavedRegister)
2815 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2816 }
2817
2818 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2819 const MCPhysReg *I =
2820 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2821 if (I) {
2822 for (; *I; ++I) {
2823 if (X86::GR64RegClass.contains(*I))
2824 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2825 else
2826 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2826)
;
2827 }
2828 }
2829
2830 RetOps[0] = Chain; // Update chain.
2831
2832 // Add the flag if we have it.
2833 if (Flag.getNode())
2834 RetOps.push_back(Flag);
2835
2836 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2837 if (CallConv == CallingConv::X86_INTR)
2838 opcode = X86ISD::IRET;
2839 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2840}
2841
2842bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2843 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2844 return false;
2845
2846 SDValue TCChain = Chain;
2847 SDNode *Copy = *N->use_begin();
2848 if (Copy->getOpcode() == ISD::CopyToReg) {
2849 // If the copy has a glue operand, we conservatively assume it isn't safe to
2850 // perform a tail call.
2851 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2852 return false;
2853 TCChain = Copy->getOperand(0);
2854 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2855 return false;
2856
2857 bool HasRet = false;
2858 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2859 UI != UE; ++UI) {
2860 if (UI->getOpcode() != X86ISD::RET_FLAG)
2861 return false;
2862 // If we are returning more than one value, we can definitely
2863 // not make a tail call see PR19530
2864 if (UI->getNumOperands() > 4)
2865 return false;
2866 if (UI->getNumOperands() == 4 &&
2867 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2868 return false;
2869 HasRet = true;
2870 }
2871
2872 if (!HasRet)
2873 return false;
2874
2875 Chain = TCChain;
2876 return true;
2877}
2878
2879EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2880 ISD::NodeType ExtendKind) const {
2881 MVT ReturnMVT = MVT::i32;
2882
2883 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2884 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2885 // The ABI does not require i1, i8 or i16 to be extended.
2886 //
2887 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2888 // always extending i8/i16 return values, so keep doing that for now.
2889 // (PR26665).
2890 ReturnMVT = MVT::i8;
2891 }
2892
2893 EVT MinVT = getRegisterType(Context, ReturnMVT);
2894 return VT.bitsLT(MinVT) ? MinVT : VT;
2895}
2896
2897/// Reads two 32 bit registers and creates a 64 bit mask value.
2898/// \param VA The current 32 bit value that need to be assigned.
2899/// \param NextVA The next 32 bit value that need to be assigned.
2900/// \param Root The parent DAG node.
2901/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2902/// glue purposes. In the case the DAG is already using
2903/// physical register instead of virtual, we should glue
2904/// our new SDValue to InFlag SDvalue.
2905/// \return a new SDvalue of size 64bit.
2906static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2907 SDValue &Root, SelectionDAG &DAG,
2908 const SDLoc &Dl, const X86Subtarget &Subtarget,
2909 SDValue *InFlag = nullptr) {
2910 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2910, __PRETTY_FUNCTION__))
;
2911 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2911, __PRETTY_FUNCTION__))
;
2912 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2913, __PRETTY_FUNCTION__))
2913 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2913, __PRETTY_FUNCTION__))
;
2914 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2915, __PRETTY_FUNCTION__))
2915 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2915, __PRETTY_FUNCTION__))
;
2916 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2917, __PRETTY_FUNCTION__))
2917 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2917, __PRETTY_FUNCTION__))
;
2918
2919 SDValue Lo, Hi;
2920 SDValue ArgValueLo, ArgValueHi;
2921
2922 MachineFunction &MF = DAG.getMachineFunction();
2923 const TargetRegisterClass *RC = &X86::GR32RegClass;
2924
2925 // Read a 32 bit value from the registers.
2926 if (nullptr == InFlag) {
2927 // When no physical register is present,
2928 // create an intermediate virtual register.
2929 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2930 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2931 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2932 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2933 } else {
2934 // When a physical register is available read the value from it and glue
2935 // the reads together.
2936 ArgValueLo =
2937 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2938 *InFlag = ArgValueLo.getValue(2);
2939 ArgValueHi =
2940 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2941 *InFlag = ArgValueHi.getValue(2);
2942 }
2943
2944 // Convert the i32 type into v32i1 type.
2945 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2946
2947 // Convert the i32 type into v32i1 type.
2948 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2949
2950 // Concatenate the two values together.
2951 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2952}
2953
2954/// The function will lower a register of various sizes (8/16/32/64)
2955/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2956/// \returns a DAG node contains the operand after lowering to mask type.
2957static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2958 const EVT &ValLoc, const SDLoc &Dl,
2959 SelectionDAG &DAG) {
2960 SDValue ValReturned = ValArg;
2961
2962 if (ValVT == MVT::v1i1)
2963 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2964
2965 if (ValVT == MVT::v64i1) {
2966 // In 32 bit machine, this case is handled by getv64i1Argument
2967 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2967, __PRETTY_FUNCTION__))
;
2968 // In 64 bit machine, There is no need to truncate the value only bitcast
2969 } else {
2970 MVT maskLen;
2971 switch (ValVT.getSimpleVT().SimpleTy) {
2972 case MVT::v8i1:
2973 maskLen = MVT::i8;
2974 break;
2975 case MVT::v16i1:
2976 maskLen = MVT::i16;
2977 break;
2978 case MVT::v32i1:
2979 maskLen = MVT::i32;
2980 break;
2981 default:
2982 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2982)
;
2983 }
2984
2985 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2986 }
2987 return DAG.getBitcast(ValVT, ValReturned);
2988}
2989
2990/// Lower the result values of a call into the
2991/// appropriate copies out of appropriate physical registers.
2992///
2993SDValue X86TargetLowering::LowerCallResult(
2994 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2995 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2996 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2997 uint32_t *RegMask) const {
2998
2999 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3000 // Assign locations to each value returned by this call.
3001 SmallVector<CCValAssign, 16> RVLocs;
3002 bool Is64Bit = Subtarget.is64Bit();
3003 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3004 *DAG.getContext());
3005 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3006
3007 // Copy all of the result registers out of their specified physreg.
3008 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3009 ++I, ++InsIndex) {
3010 CCValAssign &VA = RVLocs[I];
3011 EVT CopyVT = VA.getLocVT();
3012
3013 // In some calling conventions we need to remove the used registers
3014 // from the register mask.
3015 if (RegMask) {
3016 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3017 SubRegs.isValid(); ++SubRegs)
3018 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3019 }
3020
3021 // If this is x86-64, and we disabled SSE, we can't return FP values
3022 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
3023 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
3024 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3025 if (VA.getLocReg() == X86::XMM1)
3026 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3027 else
3028 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3029 } else if (CopyVT == MVT::f64 &&
3030 (Is64Bit && !Subtarget.hasSSE2())) {
3031 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3032 if (VA.getLocReg() == X86::XMM1)
3033 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3034 else
3035 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3036 }
3037
3038 // If we prefer to use the value in xmm registers, copy it out as f80 and
3039 // use a truncate to move it from fp stack reg to xmm reg.
3040 bool RoundAfterCopy = false;
3041 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3042 isScalarFPTypeInSSEReg(VA.getValVT())) {
3043 if (!Subtarget.hasX87())
3044 report_fatal_error("X87 register return with X87 disabled");
3045 CopyVT = MVT::f80;
3046 RoundAfterCopy = (CopyVT != VA.getLocVT());
3047 }
3048
3049 SDValue Val;
3050 if (VA.needsCustom()) {
3051 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3052, __PRETTY_FUNCTION__))
3052 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3052, __PRETTY_FUNCTION__))
;
3053 Val =
3054 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3055 } else {
3056 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3057 .getValue(1);
3058 Val = Chain.getValue(0);
3059 InFlag = Chain.getValue(2);
3060 }
3061
3062 if (RoundAfterCopy)
3063 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3064 // This truncation won't change the value.
3065 DAG.getIntPtrConstant(1, dl));
3066
3067 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
3068 if (VA.getValVT().isVector() &&
3069 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3070 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3071 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3072 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3073 } else
3074 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3075 }
3076
3077 InVals.push_back(Val);
3078 }
3079
3080 return Chain;
3081}
3082
3083//===----------------------------------------------------------------------===//
3084// C & StdCall & Fast Calling Convention implementation
3085//===----------------------------------------------------------------------===//
3086// StdCall calling convention seems to be standard for many Windows' API
3087// routines and around. It differs from C calling convention just a little:
3088// callee should clean up the stack, not caller. Symbols should be also
3089// decorated in some fancy way :) It doesn't support any vector arguments.
3090// For info on fast calling convention see Fast Calling Convention (tail call)
3091// implementation LowerX86_32FastCCCallTo.
3092
3093/// CallIsStructReturn - Determines whether a call uses struct return
3094/// semantics.
3095enum StructReturnType {
3096 NotStructReturn,
3097 RegStructReturn,
3098 StackStructReturn
3099};
3100static StructReturnType
3101callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3102 if (Outs.empty())
3103 return NotStructReturn;
3104
3105 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3106 if (!Flags.isSRet())
3107 return NotStructReturn;
3108 if (Flags.isInReg() || IsMCU)
3109 return RegStructReturn;
3110 return StackStructReturn;
3111}
3112
3113/// Determines whether a function uses struct return semantics.
3114static StructReturnType
3115argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3116 if (Ins.empty())
3117 return NotStructReturn;
3118
3119 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3120 if (!Flags.isSRet())
3121 return NotStructReturn;
3122 if (Flags.isInReg() || IsMCU)
3123 return RegStructReturn;
3124 return StackStructReturn;
3125}
3126
3127/// Make a copy of an aggregate at address specified by "Src" to address
3128/// "Dst" with size and alignment information specified by the specific
3129/// parameter attribute. The copy will be passed as a byval function parameter.
3130static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3131 SDValue Chain, ISD::ArgFlagsTy Flags,
3132 SelectionDAG &DAG, const SDLoc &dl) {
3133 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
3134
3135 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
3136 /*isVolatile*/false, /*AlwaysInline=*/true,
3137 /*isTailCall*/false,
3138 MachinePointerInfo(), MachinePointerInfo());
3139}
3140
3141/// Return true if the calling convention is one that we can guarantee TCO for.
3142static bool canGuaranteeTCO(CallingConv::ID CC) {
3143 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3144 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3145 CC == CallingConv::HHVM || CC == CallingConv::Tail);
3146}
3147
3148/// Return true if we might ever do TCO for calls with this calling convention.
3149static bool mayTailCallThisCC(CallingConv::ID CC) {
3150 switch (CC) {
3151 // C calling conventions:
3152 case CallingConv::C:
3153 case CallingConv::Win64:
3154 case CallingConv::X86_64_SysV:
3155 // Callee pop conventions:
3156 case CallingConv::X86_ThisCall:
3157 case CallingConv::X86_StdCall:
3158 case CallingConv::X86_VectorCall:
3159 case CallingConv::X86_FastCall:
3160 // Swift:
3161 case CallingConv::Swift:
3162 return true;
3163 default:
3164 return canGuaranteeTCO(CC);
3165 }
3166}
3167
3168/// Return true if the function is being made into a tailcall target by
3169/// changing its ABI.
3170static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3171 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
3172}
3173
3174bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3175 if (!CI->isTailCall())
3176 return false;
3177
3178 ImmutableCallSite CS(CI);
3179 CallingConv::ID CalleeCC = CS.getCallingConv();
3180 if (!mayTailCallThisCC(CalleeCC))
3181 return false;
3182
3183 return true;
3184}
3185
3186SDValue
3187X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3188 const SmallVectorImpl<ISD::InputArg> &Ins,
3189 const SDLoc &dl, SelectionDAG &DAG,
3190 const CCValAssign &VA,
3191 MachineFrameInfo &MFI, unsigned i) const {
3192 // Create the nodes corresponding to a load from this parameter slot.
3193 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3194 bool AlwaysUseMutable = shouldGuaranteeTCO(
3195 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3196 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3197 EVT ValVT;
3198 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3199
3200 // If value is passed by pointer we have address passed instead of the value
3201 // itself. No need to extend if the mask value and location share the same
3202 // absolute size.
3203 bool ExtendedInMem =
3204 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3205 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3206
3207 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3208 ValVT = VA.getLocVT();
3209 else
3210 ValVT = VA.getValVT();
3211
3212 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3213 // changed with more analysis.
3214 // In case of tail call optimization mark all arguments mutable. Since they
3215 // could be overwritten by lowering of arguments in case of a tail call.
3216 if (Flags.isByVal()) {
3217 unsigned Bytes = Flags.getByValSize();
3218 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3219
3220 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3221 // can be improved with deeper analysis.
3222 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3223 /*isAliased=*/true);
3224 return DAG.getFrameIndex(FI, PtrVT);
3225 }
3226
3227 // This is an argument in memory. We might be able to perform copy elision.
3228 // If the argument is passed directly in memory without any extension, then we
3229 // can perform copy elision. Large vector types, for example, may be passed
3230 // indirectly by pointer.
3231 if (Flags.isCopyElisionCandidate() &&
3232 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
3233 EVT ArgVT = Ins[i].ArgVT;
3234 SDValue PartAddr;
3235 if (Ins[i].PartOffset == 0) {
3236 // If this is a one-part value or the first part of a multi-part value,
3237 // create a stack object for the entire argument value type and return a
3238 // load from our portion of it. This assumes that if the first part of an
3239 // argument is in memory, the rest will also be in memory.
3240 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3241 /*IsImmutable=*/false);
3242 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3243 return DAG.getLoad(
3244 ValVT, dl, Chain, PartAddr,
3245 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3246 } else {
3247 // This is not the first piece of an argument in memory. See if there is
3248 // already a fixed stack object including this offset. If so, assume it
3249 // was created by the PartOffset == 0 branch above and create a load from
3250 // the appropriate offset into it.
3251 int64_t PartBegin = VA.getLocMemOffset();
3252 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3253 int FI = MFI.getObjectIndexBegin();
3254 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3255 int64_t ObjBegin = MFI.getObjectOffset(FI);
3256 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3257 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3258 break;
3259 }
3260 if (MFI.isFixedObjectIndex(FI)) {
3261 SDValue Addr =
3262 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3263 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3264 return DAG.getLoad(
3265 ValVT, dl, Chain, Addr,
3266 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3267 Ins[i].PartOffset));
3268 }
3269 }
3270 }
3271
3272 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3273 VA.getLocMemOffset(), isImmutable);
3274
3275 // Set SExt or ZExt flag.
3276 if (VA.getLocInfo() == CCValAssign::ZExt) {
3277 MFI.setObjectZExt(FI, true);
3278 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3279 MFI.setObjectSExt(FI, true);
3280 }
3281
3282 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3283 SDValue Val = DAG.getLoad(
3284 ValVT, dl, Chain, FIN,
3285 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3286 return ExtendedInMem
3287 ? (VA.getValVT().isVector()
3288 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3289 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3290 : Val;
3291}
3292
3293// FIXME: Get this from tablegen.
3294static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3295 const X86Subtarget &Subtarget) {
3296 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3296, __PRETTY_FUNCTION__))
;
3297
3298 if (Subtarget.isCallingConvWin64(CallConv)) {
3299 static const MCPhysReg GPR64ArgRegsWin64[] = {
3300 X86::RCX, X86::RDX, X86::R8, X86::R9
3301 };
3302 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3303 }
3304
3305 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3306 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3307 };
3308 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3309}
3310
3311// FIXME: Get this from tablegen.
3312static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3313 CallingConv::ID CallConv,
3314 const X86Subtarget &Subtarget) {
3315 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3315, __PRETTY_FUNCTION__))
;
3316 if (Subtarget.isCallingConvWin64(CallConv)) {
3317 // The XMM registers which might contain var arg parameters are shadowed
3318 // in their paired GPR. So we only need to save the GPR to their home
3319 // slots.
3320 // TODO: __vectorcall will change this.
3321 return None;
3322 }
3323
3324 const Function &F = MF.getFunction();
3325 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3326 bool isSoftFloat = Subtarget.useSoftFloat();
3327 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3328, __PRETTY_FUNCTION__))
3328 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3328, __PRETTY_FUNCTION__))
;
3329 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3330 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3331 // registers.
3332 return None;
3333
3334 static const MCPhysReg XMMArgRegs64Bit[] = {
3335 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3336 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3337 };
3338 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3339}
3340
3341#ifndef NDEBUG
3342static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3343 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
3344 [](const CCValAssign &A, const CCValAssign &B) -> bool {
3345 return A.getValNo() < B.getValNo();
3346 });
3347}
3348#endif
3349
3350SDValue X86TargetLowering::LowerFormalArguments(
3351 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3352 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3353 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3354 MachineFunction &MF = DAG.getMachineFunction();
3355 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3356 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3357
3358 const Function &F = MF.getFunction();
3359 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3360 F.getName() == "main")
3361 FuncInfo->setForceFramePointer(true);
3362
3363 MachineFrameInfo &MFI = MF.getFrameInfo();
3364 bool Is64Bit = Subtarget.is64Bit();
3365 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3366
3367 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3369, __PRETTY_FUNCTION__))
3368 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3369, __PRETTY_FUNCTION__))
3369 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3369, __PRETTY_FUNCTION__))
;
3370
3371 // Assign locations to all of the incoming arguments.
3372 SmallVector<CCValAssign, 16> ArgLocs;
3373 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3374
3375 // Allocate shadow area for Win64.
3376 if (IsWin64)
3377 CCInfo.AllocateStack(32, 8);
3378
3379 CCInfo.AnalyzeArguments(Ins, CC_X86);
3380
3381 // In vectorcall calling convention a second pass is required for the HVA
3382 // types.
3383 if (CallingConv::X86_VectorCall == CallConv) {
3384 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3385 }
3386
3387 // The next loop assumes that the locations are in the same order of the
3388 // input arguments.
3389 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3390, __PRETTY_FUNCTION__))
3390 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3390, __PRETTY_FUNCTION__))
;
3391
3392 SDValue ArgValue;
3393 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3394 ++I, ++InsIndex) {
3395 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3395, __PRETTY_FUNCTION__))
;
3396 CCValAssign &VA = ArgLocs[I];
3397
3398 if (VA.isRegLoc()) {
3399 EVT RegVT = VA.getLocVT();
3400 if (VA.needsCustom()) {
3401 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3403, __PRETTY_FUNCTION__))
3402 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3403, __PRETTY_FUNCTION__))
3403 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3403, __PRETTY_FUNCTION__))
;
3404
3405 // v64i1 values, in regcall calling convention, that are
3406 // compiled to 32 bit arch, are split up into two registers.
3407 ArgValue =
3408 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3409 } else {
3410 const TargetRegisterClass *RC;
3411 if (RegVT == MVT::i8)
3412 RC = &X86::GR8RegClass;
3413 else if (RegVT == MVT::i16)
3414 RC = &X86::GR16RegClass;
3415 else if (RegVT == MVT::i32)
3416 RC = &X86::GR32RegClass;
3417 else if (Is64Bit && RegVT == MVT::i64)
3418 RC = &X86::GR64RegClass;
3419 else if (RegVT == MVT::f32)
3420 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3421 else if (RegVT == MVT::f64)
3422 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3423 else if (RegVT == MVT::f80)
3424 RC = &X86::RFP80RegClass;
3425 else if (RegVT == MVT::f128)
3426 RC = &X86::VR128RegClass;
3427 else if (RegVT.is512BitVector())
3428 RC = &X86::VR512RegClass;
3429 else if (RegVT.is256BitVector())
3430 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3431 else if (RegVT.is128BitVector())
3432 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3433 else if (RegVT == MVT::x86mmx)
3434 RC = &X86::VR64RegClass;
3435 else if (RegVT == MVT::v1i1)
3436 RC = &X86::VK1RegClass;
3437 else if (RegVT == MVT::v8i1)
3438 RC = &X86::VK8RegClass;
3439 else if (RegVT == MVT::v16i1)
3440 RC = &X86::VK16RegClass;
3441 else if (RegVT == MVT::v32i1)
3442 RC = &X86::VK32RegClass;
3443 else if (RegVT == MVT::v64i1)
3444 RC = &X86::VK64RegClass;
3445 else
3446 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3446)
;
3447
3448 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3449 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3450 }
3451
3452 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3453 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3454 // right size.
3455 if (VA.getLocInfo() == CCValAssign::SExt)
3456 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3457 DAG.getValueType(VA.getValVT()));
3458 else if (VA.getLocInfo() == CCValAssign::ZExt)
3459 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3460 DAG.getValueType(VA.getValVT()));
3461 else if (VA.getLocInfo() == CCValAssign::BCvt)
3462 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3463
3464 if (VA.isExtInLoc()) {
3465 // Handle MMX values passed in XMM regs.
3466 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3467 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3468 else if (VA.getValVT().isVector() &&
3469 VA.getValVT().getScalarType() == MVT::i1 &&
3470 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3471 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3472 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3473 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3474 } else
3475 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3476 }
3477 } else {
3478 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3478, __PRETTY_FUNCTION__))
;
3479 ArgValue =
3480 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3481 }
3482
3483 // If value is passed via pointer - do a load.
3484 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3485 ArgValue =
3486 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3487
3488 InVals.push_back(ArgValue);
3489 }
3490
3491 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3492 // Swift calling convention does not require we copy the sret argument
3493 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3494 if (CallConv == CallingConv::Swift)
3495 continue;
3496
3497 // All x86 ABIs require that for returning structs by value we copy the
3498 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3499 // the argument into a virtual register so that we can access it from the
3500 // return points.
3501 if (Ins[I].Flags.isSRet()) {
3502 unsigned Reg = FuncInfo->getSRetReturnReg();
3503 if (!Reg) {
3504 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3505 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3506 FuncInfo->setSRetReturnReg(Reg);
3507 }
3508 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3509 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3510 break;
3511 }
3512 }
3513
3514 unsigned StackSize = CCInfo.getNextStackOffset();
3515 // Align stack specially for tail calls.
3516 if (shouldGuaranteeTCO(CallConv,
3517 MF.getTarget().Options.GuaranteedTailCallOpt))
3518 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3519
3520 // If the function takes variable number of arguments, make a frame index for
3521 // the start of the first vararg value... for expansion of llvm.va_start. We
3522 // can skip this if there are no va_start calls.
3523 if (MFI.hasVAStart() &&
3524 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3525 CallConv != CallingConv::X86_ThisCall))) {
3526 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3527 }
3528
3529 // Figure out if XMM registers are in use.
3530 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3532, __PRETTY_FUNCTION__))
3531 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3532, __PRETTY_FUNCTION__))
3532 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3532, __PRETTY_FUNCTION__))
;
3533
3534 // 64-bit calling conventions support varargs and register parameters, so we
3535 // have to do extra work to spill them in the prologue.
3536 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3537 // Find the first unallocated argument registers.
3538 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3539 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3540 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3541 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3542 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3543, __PRETTY_FUNCTION__))
3543 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3543, __PRETTY_FUNCTION__))
;
3544
3545 // Gather all the live in physical registers.
3546 SmallVector<SDValue, 6> LiveGPRs;
3547 SmallVector<SDValue, 8> LiveXMMRegs;
3548 SDValue ALVal;
3549 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3550 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3551 LiveGPRs.push_back(
3552 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3553 }
3554 if (!ArgXMMs.empty()) {
3555 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3556 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3557 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3558 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3559 LiveXMMRegs.push_back(
3560 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3561 }
3562 }
3563
3564 if (IsWin64) {
3565 // Get to the caller-allocated home save location. Add 8 to account
3566 // for the return address.
3567 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3568 FuncInfo->setRegSaveFrameIndex(
3569 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3570 // Fixup to set vararg frame on shadow area (4 x i64).
3571 if (NumIntRegs < 4)
3572 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3573 } else {
3574 // For X86-64, if there are vararg parameters that are passed via
3575 // registers, then we must store them to their spots on the stack so
3576 // they may be loaded by dereferencing the result of va_next.
3577 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3578 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3579 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3580 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3581 }
3582
3583 // Store the integer parameter registers.
3584 SmallVector<SDValue, 8> MemOps;
3585 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3586 getPointerTy(DAG.getDataLayout()));
3587 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3588 for (SDValue Val : LiveGPRs) {
3589 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3590 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3591 SDValue Store =
3592 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3593 MachinePointerInfo::getFixedStack(
3594 DAG.getMachineFunction(),
3595 FuncInfo->getRegSaveFrameIndex(), Offset));
3596 MemOps.push_back(Store);
3597 Offset += 8;
3598 }
3599
3600 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3601 // Now store the XMM (fp + vector) parameter registers.
3602 SmallVector<SDValue, 12> SaveXMMOps;
3603 SaveXMMOps.push_back(Chain);
3604 SaveXMMOps.push_back(ALVal);
3605 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3606 FuncInfo->getRegSaveFrameIndex(), dl));
3607 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3608 FuncInfo->getVarArgsFPOffset(), dl));
3609 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3610 LiveXMMRegs.end());
3611 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3612 MVT::Other, SaveXMMOps));
3613 }
3614
3615 if (!MemOps.empty())
3616 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3617 }
3618
3619 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3620 // Find the largest legal vector type.
3621 MVT VecVT = MVT::Other;
3622 // FIXME: Only some x86_32 calling conventions support AVX512.
3623 if (Subtarget.useAVX512Regs() &&
3624 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3625 CallConv == CallingConv::Intel_OCL_BI)))
3626 VecVT = MVT::v16f32;
3627 else if (Subtarget.hasAVX())
3628 VecVT = MVT::v8f32;
3629 else if (Subtarget.hasSSE2())
3630 VecVT = MVT::v4f32;
3631
3632 // We forward some GPRs and some vector types.
3633 SmallVector<MVT, 2> RegParmTypes;
3634 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3635 RegParmTypes.push_back(IntVT);
3636 if (VecVT != MVT::Other)
3637 RegParmTypes.push_back(VecVT);
3638
3639 // Compute the set of forwarded registers. The rest are scratch.
3640 SmallVectorImpl<ForwardedRegister> &Forwards =
3641 FuncInfo->getForwardedMustTailRegParms();
3642 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3643
3644 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3645 if (Is64Bit && !IsWin64 && !CCInfo.isAllocated(X86::AL)) {
3646 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3647 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3648 }
3649
3650 // Copy all forwards from physical to virtual registers.
3651 for (ForwardedRegister &FR : Forwards) {
3652 // FIXME: Can we use a less constrained schedule?
3653 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
3654 FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
3655 Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
3656 }
3657 }
3658
3659 // Some CCs need callee pop.
3660 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3661 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3662 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3663 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3664 // X86 interrupts must pop the error code (and the alignment padding) if
3665 // present.
3666 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3667 } else {
3668 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3669 // If this is an sret function, the return should pop the hidden pointer.
3670 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3671 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3672 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3673 FuncInfo->setBytesToPopOnReturn(4);
3674 }
3675
3676 if (!Is64Bit) {
3677 // RegSaveFrameIndex is X86-64 only.
3678 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3679 if (CallConv == CallingConv::X86_FastCall ||
3680 CallConv == CallingConv::X86_ThisCall)
3681 // fastcc functions can't have varargs.
3682 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3683 }
3684
3685 FuncInfo->setArgumentStackSize(StackSize);
3686
3687 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3688 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3689 if (Personality == EHPersonality::CoreCLR) {
3690 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3690, __PRETTY_FUNCTION__))
;
3691 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3692 // that we'd prefer this slot be allocated towards the bottom of the frame
3693 // (i.e. near the stack pointer after allocating the frame). Every
3694 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3695 // offset from the bottom of this and each funclet's frame must be the
3696 // same, so the size of funclets' (mostly empty) frames is dictated by
3697 // how far this slot is from the bottom (since they allocate just enough
3698 // space to accommodate holding this slot at the correct offset).
3699 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3700 EHInfo->PSPSymFrameIdx = PSPSymFI;
3701 }
3702 }
3703
3704 if (CallConv == CallingConv::X86_RegCall ||
3705 F.hasFnAttribute("no_caller_saved_registers")) {
3706 MachineRegisterInfo &MRI = MF.getRegInfo();
3707 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3708 MRI.disableCalleeSavedRegister(Pair.first);
3709 }
3710
3711 return Chain;
3712}
3713
3714SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3715 SDValue Arg, const SDLoc &dl,
3716 SelectionDAG &DAG,
3717 const CCValAssign &VA,
3718 ISD::ArgFlagsTy Flags) const {
3719 unsigned LocMemOffset = VA.getLocMemOffset();
3720 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3721 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3722 StackPtr, PtrOff);
3723 if (Flags.isByVal())
3724 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3725
3726 return DAG.getStore(
3727 Chain, dl, Arg, PtrOff,
3728 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3729}
3730
3731/// Emit a load of return address if tail call
3732/// optimization is performed and it is required.
3733SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3734 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3735 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3736 // Adjust the Return address stack slot.
3737 EVT VT = getPointerTy(DAG.getDataLayout());
3738 OutRetAddr = getReturnAddressFrameIndex(DAG);
3739
3740 // Load the "old" Return address.
3741 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3742 return SDValue(OutRetAddr.getNode(), 1);
3743}
3744
3745/// Emit a store of the return address if tail call
3746/// optimization is performed and it is required (FPDiff!=0).
3747static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3748 SDValue Chain, SDValue RetAddrFrIdx,
3749 EVT PtrVT, unsigned SlotSize,
3750 int FPDiff, const SDLoc &dl) {
3751 // Store the return address to the appropriate stack slot.
3752 if (!FPDiff) return Chain;
3753 // Calculate the new stack slot for the return address.
3754 int NewReturnAddrFI =
3755 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3756 false);
3757 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3758 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3759 MachinePointerInfo::getFixedStack(
3760 DAG.getMachineFunction(), NewReturnAddrFI));
3761 return Chain;
3762}
3763
3764/// Returns a vector_shuffle mask for an movs{s|d}, movd
3765/// operation of specified width.
3766static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3767 SDValue V2) {
3768 unsigned NumElems = VT.getVectorNumElements();
3769 SmallVector<int, 8> Mask;
3770 Mask.push_back(NumElems);
3771 for (unsigned i = 1; i != NumElems; ++i)
3772 Mask.push_back(i);
3773 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3774}
3775
3776SDValue
3777X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3778 SmallVectorImpl<SDValue> &InVals) const {
3779 SelectionDAG &DAG = CLI.DAG;
3780 SDLoc &dl = CLI.DL;
3781 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3782 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3783 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3784 SDValue Chain = CLI.Chain;
3785 SDValue Callee = CLI.Callee;
3786 CallingConv::ID CallConv = CLI.CallConv;
3787 bool &isTailCall = CLI.IsTailCall;
3788 bool isVarArg = CLI.IsVarArg;
3789
3790 MachineFunction &MF = DAG.getMachineFunction();
3791 bool Is64Bit = Subtarget.is64Bit();
3792 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3793 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3794 bool IsSibcall = false;
3795 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3796 CallConv == CallingConv::Tail;
3797 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3798 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3799 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3800 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3801 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3802 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3803 bool HasNoCfCheck =
3804 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3805 const Module *M = MF.getMMI().getModule();
3806 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3807
3808 MachineFunction::CallSiteInfo CSInfo;
3809
3810 if (CallConv == CallingConv::X86_INTR)
3811 report_fatal_error("X86 interrupts may not be called directly");
3812
3813 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
3814 // If we are using a GOT, disable tail calls to external symbols with
3815 // default visibility. Tail calling such a symbol requires using a GOT
3816 // relocation, which forces early binding of the symbol. This breaks code
3817 // that require lazy function symbol resolution. Using musttail or
3818 // GuaranteedTailCallOpt will override this.
3819 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3820 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3821 G->getGlobal()->hasDefaultVisibility()))
3822 isTailCall = false;
3823 }
3824
3825 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3826 if (IsMustTail) {
3827 // Force this to be a tail call. The verifier rules are enough to ensure
3828 // that we can lower this successfully without moving the return address
3829 // around.
3830 isTailCall = true;
3831 } else if (isTailCall) {
3832 // Check if it's really possible to do a tail call.
3833 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3834 isVarArg, SR != NotStructReturn,
3835 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3836 Outs, OutVals, Ins, DAG);
3837
3838 // Sibcalls are automatically detected tailcalls which do not require
3839 // ABI changes.
3840 if (!IsGuaranteeTCO && isTailCall)
3841 IsSibcall = true;
3842
3843 if (isTailCall)
3844 ++NumTailCalls;
3845 }
3846
3847 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3848, __PRETTY_FUNCTION__))
3848 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3848, __PRETTY_FUNCTION__))
;
3849
3850 // Analyze operands of the call, assigning locations to each operand.
3851 SmallVector<CCValAssign, 16> ArgLocs;
3852 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3853
3854 // Allocate shadow area for Win64.
3855 if (IsWin64)
3856 CCInfo.AllocateStack(32, 8);
3857
3858 CCInfo.AnalyzeArguments(Outs, CC_X86);
3859
3860 // In vectorcall calling convention a second pass is required for the HVA
3861 // types.
3862 if (CallingConv::X86_VectorCall == CallConv) {
3863 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3864 }
3865
3866 // Get a count of how many bytes are to be pushed on the stack.
3867 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3868 if (IsSibcall)
3869 // This is a sibcall. The memory operands are available in caller's
3870 // own caller's stack.
3871 NumBytes = 0;
3872 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
3873 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3874
3875 int FPDiff = 0;
3876 if (isTailCall && !IsSibcall && !IsMustTail) {
3877 // Lower arguments at fp - stackoffset + fpdiff.
3878 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3879
3880 FPDiff = NumBytesCallerPushed - NumBytes;
3881
3882 // Set the delta of movement of the returnaddr stackslot.
3883 // But only set if delta is greater than previous delta.
3884 if (FPDiff < X86Info->getTCReturnAddrDelta())
3885 X86Info->setTCReturnAddrDelta(FPDiff);
3886 }
3887
3888 unsigned NumBytesToPush = NumBytes;
3889 unsigned NumBytesToPop = NumBytes;
3890
3891 // If we have an inalloca argument, all stack space has already been allocated
3892 // for us and be right at the top of the stack. We don't support multiple
3893 // arguments passed in memory when using inalloca.
3894 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3895 NumBytesToPush = 0;
3896 if (!ArgLocs.back().isMemLoc())
3897 report_fatal_error("cannot use inalloca attribute on a register "
3898 "parameter");
3899 if (ArgLocs.back().getLocMemOffset() != 0)
3900 report_fatal_error("any parameter with the inalloca attribute must be "
3901 "the only memory argument");
3902 }
3903
3904 if (!IsSibcall && !IsMustTail)
3905 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3906 NumBytes - NumBytesToPush, dl);
3907
3908 SDValue RetAddrFrIdx;
3909 // Load return address for tail calls.
3910 if (isTailCall && FPDiff)
3911 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3912 Is64Bit, FPDiff, dl);
3913
3914 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3915 SmallVector<SDValue, 8> MemOpChains;
3916 SDValue StackPtr;
3917
3918 // The next loop assumes that the locations are in the same order of the
3919 // input arguments.
3920 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3921, __PRETTY_FUNCTION__))
3921 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3921, __PRETTY_FUNCTION__))
;
3922
3923 // Walk the register/memloc assignments, inserting copies/loads. In the case
3924 // of tail call optimization arguments are handle later.
3925 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3926 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3927 ++I, ++OutIndex) {
3928 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3928, __PRETTY_FUNCTION__))
;
3929 // Skip inalloca arguments, they have already been written.
3930 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3931 if (Flags.isInAlloca())
3932 continue;
3933
3934 CCValAssign &VA = ArgLocs[I];
3935 EVT RegVT = VA.getLocVT();
3936 SDValue Arg = OutVals[OutIndex];
3937 bool isByVal = Flags.isByVal();
3938
3939 // Promote the value if needed.
3940 switch (VA.getLocInfo()) {
3941 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3941)
;
3942 case CCValAssign::Full: break;
3943 case CCValAssign::SExt:
3944 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3945 break;
3946 case CCValAssign::ZExt:
3947 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3948 break;
3949 case CCValAssign::AExt:
3950 if (Arg.getValueType().isVector() &&
3951 Arg.getValueType().getVectorElementType() == MVT::i1)
3952 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3953 else if (RegVT.is128BitVector()) {
3954 // Special case: passing MMX values in XMM registers.
3955 Arg = DAG.getBitcast(MVT::i64, Arg);
3956 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3957 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3958 } else
3959 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3960 break;
3961 case CCValAssign::BCvt:
3962 Arg = DAG.getBitcast(RegVT, Arg);
3963 break;
3964 case CCValAssign::Indirect: {
3965 if (isByVal) {
3966 // Memcpy the argument to a temporary stack slot to prevent
3967 // the caller from seeing any modifications the callee may make
3968 // as guaranteed by the `byval` attribute.
3969 int FrameIdx = MF.getFrameInfo().CreateStackObject(
3970 Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
3971 false);
3972 SDValue StackSlot =
3973 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
3974 Chain =
3975 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
3976 // From now on treat this as a regular pointer
3977 Arg = StackSlot;
3978 isByVal = false;
3979 } else {
3980 // Store the argument.
3981 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3982 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3983 Chain = DAG.getStore(
3984 Chain, dl, Arg, SpillSlot,
3985 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3986 Arg = SpillSlot;
3987 }
3988 break;
3989 }
3990 }
3991
3992 if (VA.needsCustom()) {
3993 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3994, __PRETTY_FUNCTION__))
3994 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3994, __PRETTY_FUNCTION__))
;
3995 // Split v64i1 value into two registers
3996 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
3997 } else if (VA.isRegLoc()) {
3998 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3999 const TargetOptions &Options = DAG.getTarget().Options;
4000 if (Options.EnableDebugEntryValues)
4001 CSInfo.emplace_back(VA.getLocReg(), I);
4002 if (isVarArg && IsWin64) {
4003 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4004 // shadow reg if callee is a varargs function.
4005 unsigned ShadowReg = 0;
4006 switch (VA.getLocReg()) {
4007 case X86::XMM0: ShadowReg = X86::RCX; break;
4008 case X86::XMM1: ShadowReg = X86::RDX; break;
4009 case X86::XMM2: ShadowReg = X86::R8; break;
4010 case X86::XMM3: ShadowReg = X86::R9; break;
4011 }
4012 if (ShadowReg)
4013 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4014 }
4015 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4016 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4016, __PRETTY_FUNCTION__))
;
4017 if (!StackPtr.getNode())
4018 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4019 getPointerTy(DAG.getDataLayout()));
4020 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4021 dl, DAG, VA, Flags));
4022 }
4023 }
4024
4025 if (!MemOpChains.empty())
4026 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4027
4028 if (Subtarget.isPICStyleGOT()) {
4029 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4030 // GOT pointer.
4031 if (!isTailCall) {
4032 RegsToPass.push_back(std::make_pair(
4033 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4034 getPointerTy(DAG.getDataLayout()))));
4035 } else {
4036 // If we are tail calling and generating PIC/GOT style code load the
4037 // address of the callee into ECX. The value in ecx is used as target of
4038 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4039 // for tail calls on PIC/GOT architectures. Normally we would just put the
4040 // address of GOT into ebx and then call target@PLT. But for tail calls
4041 // ebx would be restored (since ebx is callee saved) before jumping to the
4042 // target@PLT.
4043
4044 // Note: The actual moving to ECX is done further down.
4045 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4046 if (G && !G->getGlobal()->hasLocalLinkage() &&
4047 G->getGlobal()->hasDefaultVisibility())
4048 Callee = LowerGlobalAddress(Callee, DAG);
4049 else if (isa<ExternalSymbolSDNode>(Callee))
4050 Callee = LowerExternalSymbol(Callee, DAG);
4051 }
4052 }
4053
4054 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4055 // From AMD64 ABI document:
4056 // For calls that may call functions that use varargs or stdargs
4057 // (prototype-less calls or calls to functions containing ellipsis (...) in
4058 // the declaration) %al is used as hidden argument to specify the number
4059 // of SSE registers used. The contents of %al do not need to match exactly
4060 // the number of registers, but must be an ubound on the number of SSE
4061 // registers used and is in the range 0 - 8 inclusive.
4062
4063 // Count the number of XMM registers allocated.
4064 static const MCPhysReg XMMArgRegs[] = {
4065 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4066 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4067 };
4068 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4069 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4070, __PRETTY_FUNCTION__))
4070 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4070, __PRETTY_FUNCTION__))
;
4071
4072 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
4073 DAG.getConstant(NumXMMRegs, dl,
4074 MVT::i8)));
4075 }
4076
4077 if (isVarArg && IsMustTail) {
4078 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4079 for (const auto &F : Forwards) {
4080 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4081 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
4082 }
4083 }
4084
4085 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4086 // don't need this because the eligibility check rejects calls that require
4087 // shuffling arguments passed in memory.
4088 if (!IsSibcall && isTailCall) {
4089 // Force all the incoming stack arguments to be loaded from the stack
4090 // before any new outgoing arguments are stored to the stack, because the
4091 // outgoing stack slots may alias the incoming argument stack slots, and
4092 // the alias isn't otherwise explicit. This is slightly more conservative
4093 // than necessary, because it means that each store effectively depends
4094 // on every argument instead of just those arguments it would clobber.
4095 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4096
4097 SmallVector<SDValue, 8> MemOpChains2;
4098 SDValue FIN;
4099 int FI = 0;
4100 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4101 ++I, ++OutsIndex) {
4102 CCValAssign &VA = ArgLocs[I];
4103
4104 if (VA.isRegLoc()) {
4105 if (VA.needsCustom()) {
4106 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4107, __PRETTY_FUNCTION__))
4107 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4107, __PRETTY_FUNCTION__))
;
4108 // This means that we are in special case where one argument was
4109 // passed through two register locations - Skip the next location
4110 ++I;
4111 }
4112
4113 continue;
4114 }
4115
4116 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4116, __PRETTY_FUNCTION__))
;
4117 SDValue Arg = OutVals[OutsIndex];
4118 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4119 // Skip inalloca arguments. They don't require any work.
4120 if (Flags.isInAlloca())
4121 continue;
4122 // Create frame index.
4123 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4124 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4125 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4126 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4127
4128 if (Flags.isByVal()) {
4129 // Copy relative to framepointer.
4130 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4131 if (!StackPtr.getNode())
4132 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4133 getPointerTy(DAG.getDataLayout()));
4134 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4135 StackPtr, Source);
4136
4137 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4138 ArgChain,
4139 Flags, DAG, dl));
4140 } else {
4141 // Store relative to framepointer.
4142 MemOpChains2.push_back(DAG.getStore(
4143 ArgChain, dl, Arg, FIN,
4144 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4145 }
4146 }
4147
4148 if (!MemOpChains2.empty())
4149 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4150
4151 // Store the return address to the appropriate stack slot.
4152 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4153 getPointerTy(DAG.getDataLayout()),
4154 RegInfo->getSlotSize(), FPDiff, dl);
4155 }
4156
4157 // Build a sequence of copy-to-reg nodes chained together with token chain
4158 // and flag operands which copy the outgoing args into registers.
4159 SDValue InFlag;
4160 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4161 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4162 RegsToPass[i].second, InFlag);
4163 InFlag = Chain.getValue(1);
4164 }
4165
4166 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4167 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4167, __PRETTY_FUNCTION__))
;
4168 // In the 64-bit large code model, we have to make all calls
4169 // through a register, since the call instruction's 32-bit
4170 // pc-relative offset may not be large enough to hold the whole
4171 // address.
4172 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4173 Callee->getOpcode() == ISD::ExternalSymbol) {
4174 // Lower direct calls to global addresses and external symbols. Setting
4175 // ForCall to true here has the effect of removing WrapperRIP when possible
4176 // to allow direct calls to be selected without first materializing the
4177 // address into a register.
4178 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4179 } else if (Subtarget.isTarget64BitILP32() &&
4180 Callee->getValueType(0) == MVT::i32) {
4181 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4182 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4183 }
4184
4185 // Returns a chain & a flag for retval copy to use.
4186 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4187 SmallVector<SDValue, 8> Ops;
4188
4189 if (!IsSibcall && isTailCall && !IsMustTail) {
4190 Chain = DAG.getCALLSEQ_END(Chain,
4191 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4192 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4193 InFlag = Chain.getValue(1);
4194 }
4195
4196 Ops.push_back(Chain);
4197 Ops.push_back(Callee);
4198
4199 if (isTailCall)
4200 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
4201
4202 // Add argument registers to the end of the list so that they are known live
4203 // into the call.
4204 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4205 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4206 RegsToPass[i].second.getValueType()));
4207
4208 // Add a register mask operand representing the call-preserved registers.
4209 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4210 // set X86_INTR calling convention because it has the same CSR mask
4211 // (same preserved registers).
4212 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4213 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4214 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4214, __PRETTY_FUNCTION__))
;
4215
4216 // If this is an invoke in a 32-bit function using a funclet-based
4217 // personality, assume the function clobbers all registers. If an exception
4218 // is thrown, the runtime will not restore CSRs.
4219 // FIXME: Model this more precisely so that we can register allocate across
4220 // the normal edge and spill and fill across the exceptional edge.
4221 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
4222 const Function &CallerFn = MF.getFunction();
4223 EHPersonality Pers =
4224 CallerFn.hasPersonalityFn()
4225 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4226 : EHPersonality::Unknown;
4227 if (isFuncletEHPersonality(Pers))
4228 Mask = RegInfo->getNoPreservedMask();
4229 }
4230
4231 // Define a new register mask from the existing mask.
4232 uint32_t *RegMask = nullptr;
4233
4234 // In some calling conventions we need to remove the used physical registers
4235 // from the reg mask.
4236 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4237 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4238
4239 // Allocate a new Reg Mask and copy Mask.
4240 RegMask = MF.allocateRegMask();
4241 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4242 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4243
4244 // Make sure all sub registers of the argument registers are reset
4245 // in the RegMask.
4246 for (auto const &RegPair : RegsToPass)
4247 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4248 SubRegs.isValid(); ++SubRegs)
4249 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4250
4251 // Create the RegMask Operand according to our updated mask.
4252 Ops.push_back(DAG.getRegisterMask(RegMask));
4253 } else {
4254 // Create the RegMask Operand according to the static mask.
4255 Ops.push_back(DAG.getRegisterMask(Mask));
4256 }
4257
4258 if (InFlag.getNode())
4259 Ops.push_back(InFlag);
4260
4261 if (isTailCall) {
4262 // We used to do:
4263 //// If this is the first return lowered for this function, add the regs
4264 //// to the liveout set for the function.
4265 // This isn't right, although it's probably harmless on x86; liveouts
4266 // should be computed from returns not tail calls. Consider a void
4267 // function making a tail call to a function returning int.
4268 MF.getFrameInfo().setHasTailCall();
4269 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4270 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4271 return Ret;
4272 }
4273
4274 if (HasNoCfCheck && IsCFProtectionSupported) {
4275 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4276 } else {
4277 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4278 }
4279 InFlag = Chain.getValue(1);
4280 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4281
4282 // Save heapallocsite metadata.
4283 if (CLI.CS)
4284 if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
4285 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4286
4287 // Create the CALLSEQ_END node.
4288 unsigned NumBytesForCalleeToPop;
4289 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4290 DAG.getTarget().Options.GuaranteedTailCallOpt))
4291 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4292 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4293 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4294 SR == StackStructReturn)
4295 // If this is a call to a struct-return function, the callee
4296 // pops the hidden struct pointer, so we have to push it back.
4297 // This is common for Darwin/X86, Linux & Mingw32 targets.
4298 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4299 NumBytesForCalleeToPop = 4;
4300 else
4301 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4302
4303 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
4304 // No need to reset the stack after the call if the call doesn't return. To
4305 // make the MI verify, we'll pretend the callee does it for us.
4306 NumBytesForCalleeToPop = NumBytes;
4307 }
4308
4309 // Returns a flag for retval copy to use.
4310 if (!IsSibcall) {
4311 Chain = DAG.getCALLSEQ_END(Chain,
4312 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4313 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4314 true),
4315 InFlag, dl);
4316 InFlag = Chain.getValue(1);
4317 }
4318
4319 // Handle result values, copying them out of physregs into vregs that we
4320 // return.
4321 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4322 InVals, RegMask);
4323}
4324
4325//===----------------------------------------------------------------------===//
4326// Fast Calling Convention (tail call) implementation
4327//===----------------------------------------------------------------------===//
4328
4329// Like std call, callee cleans arguments, convention except that ECX is
4330// reserved for storing the tail called function address. Only 2 registers are
4331// free for argument passing (inreg). Tail call optimization is performed
4332// provided:
4333// * tailcallopt is enabled
4334// * caller/callee are fastcc
4335// On X86_64 architecture with GOT-style position independent code only local
4336// (within module) calls are supported at the moment.
4337// To keep the stack aligned according to platform abi the function
4338// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4339// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4340// If a tail called function callee has more arguments than the caller the
4341// caller needs to make sure that there is room to move the RETADDR to. This is
4342// achieved by reserving an area the size of the argument delta right after the
4343// original RETADDR, but before the saved framepointer or the spilled registers
4344// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4345// stack layout:
4346// arg1
4347// arg2
4348// RETADDR
4349// [ new RETADDR
4350// move area ]
4351// (possible EBP)
4352// ESI
4353// EDI
4354// local1 ..
4355
4356/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4357/// requirement.
4358unsigned
4359X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4360 SelectionDAG &DAG) const {
4361 const Align StackAlignment(Subtarget.getFrameLowering()->getStackAlignment());
4362 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4363 assert(StackSize % SlotSize == 0 &&((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4364, __PRETTY_FUNCTION__))
4364 "StackSize must be a multiple of SlotSize")((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4364, __PRETTY_FUNCTION__))
;
4365 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4366}
4367
4368/// Return true if the given stack call argument is already available in the
4369/// same position (relatively) of the caller's incoming argument stack.
4370static
4371bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4372 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4373 const X86InstrInfo *TII, const CCValAssign &VA) {
4374 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4375
4376 for (;;) {
4377 // Look through nodes that don't alter the bits of the incoming value.
4378 unsigned Op = Arg.getOpcode();
4379 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4380 Arg = Arg.getOperand(0);
4381 continue;
4382 }
4383 if (Op == ISD::TRUNCATE) {
4384 const SDValue &TruncInput = Arg.getOperand(0);
4385 if (TruncInput.getOpcode() == ISD::AssertZext &&
4386 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4387 Arg.getValueType()) {
4388 Arg = TruncInput.getOperand(0);
4389 continue;
4390 }
4391 }
4392 break;
4393 }
4394
4395 int FI = INT_MAX2147483647;
4396 if (Arg.getOpcode() == ISD::CopyFromReg) {
4397 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4398 if (!Register::isVirtualRegister(VR))
4399 return false;
4400 MachineInstr *Def = MRI->getVRegDef(VR);
4401 if (!Def)
4402 return false;
4403 if (!Flags.isByVal()) {
4404 if (!TII->isLoadFromStackSlot(*Def, FI))
4405 return false;
4406 } else {
4407 unsigned Opcode = Def->getOpcode();
4408 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4409 Opcode == X86::LEA64_32r) &&
4410 Def->getOperand(1).isFI()) {
4411 FI = Def->getOperand(1).getIndex();
4412 Bytes = Flags.getByValSize();
4413 } else
4414 return false;
4415 }
4416 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4417 if (Flags.isByVal())
4418 // ByVal argument is passed in as a pointer but it's now being
4419 // dereferenced. e.g.
4420 // define @foo(%struct.X* %A) {
4421 // tail call @bar(%struct.X* byval %A)
4422 // }
4423 return false;
4424 SDValue Ptr = Ld->getBasePtr();
4425 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4426 if (!FINode)
4427 return false;
4428 FI = FINode->getIndex();
4429 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4430 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4431 FI = FINode->getIndex();
4432 Bytes = Flags.getByValSize();
4433 } else
4434 return false;
4435
4436 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4436, __PRETTY_FUNCTION__))
;
4437 if (!MFI.isFixedObjectIndex(FI))
4438 return false;
4439
4440 if (Offset != MFI.getObjectOffset(FI))
4441 return false;
4442
4443 // If this is not byval, check that the argument stack object is immutable.
4444 // inalloca and argument copy elision can create mutable argument stack
4445 // objects. Byval objects can be mutated, but a byval call intends to pass the
4446 // mutated memory.
4447 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4448 return false;
4449
4450 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4451 // If the argument location is wider than the argument type, check that any
4452 // extension flags match.
4453 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4454 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4455 return false;
4456 }
4457 }
4458
4459 return Bytes == MFI.getObjectSize(FI);
4460}
4461
4462/// Check whether the call is eligible for tail call optimization. Targets
4463/// that want to do tail call optimization should implement this function.
4464bool X86TargetLowering::IsEligibleForTailCallOptimization(
4465 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4466 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4467 const SmallVectorImpl<ISD::OutputArg> &Outs,
4468 const SmallVectorImpl<SDValue> &OutVals,
4469 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4470 if (!mayTailCallThisCC(CalleeCC))
4471 return false;
4472
4473 // If -tailcallopt is specified, make fastcc functions tail-callable.
4474 MachineFunction &MF = DAG.getMachineFunction();
4475 const Function &CallerF = MF.getFunction();
4476
4477 // If the function return type is x86_fp80 and the callee return type is not,
4478 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4479 // perform a tailcall optimization here.
4480 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4481 return false;
4482
4483 CallingConv::ID CallerCC = CallerF.getCallingConv();
4484 bool CCMatch = CallerCC == CalleeCC;
4485 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4486 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4487 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4488 CalleeCC == CallingConv::Tail;
4489
4490 // Win64 functions have extra shadow space for argument homing. Don't do the
4491 // sibcall if the caller and callee have mismatched expectations for this
4492 // space.
4493 if (IsCalleeWin64 != IsCallerWin64)
4494 return false;
4495
4496 if (IsGuaranteeTCO) {
4497 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4498 return true;
4499 return false;
4500 }
4501
4502 // Look for obvious safe cases to perform tail call optimization that do not
4503 // require ABI changes. This is what gcc calls sibcall.
4504
4505 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4506 // emit a special epilogue.
4507 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4508 if (RegInfo->needsStackRealignment(MF))
4509 return false;
4510
4511 // Also avoid sibcall optimization if either caller or callee uses struct
4512 // return semantics.
4513 if (isCalleeStructRet || isCallerStructRet)
4514 return false;
4515
4516 // Do not sibcall optimize vararg calls unless all arguments are passed via
4517 // registers.
4518 LLVMContext &C = *DAG.getContext();
4519 if (isVarArg && !Outs.empty()) {
4520 // Optimizing for varargs on Win64 is unlikely to be safe without
4521 // additional testing.
4522 if (IsCalleeWin64 || IsCallerWin64)
4523 return false;
4524
4525 SmallVector<CCValAssign, 16> ArgLocs;
4526 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4527
4528 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4529 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4530 if (!ArgLocs[i].isRegLoc())
4531 return false;
4532 }
4533
4534 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4535 // stack. Therefore, if it's not used by the call it is not safe to optimize
4536 // this into a sibcall.
4537 bool Unused = false;
4538 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4539 if (!Ins[i].Used) {
4540 Unused = true;
4541 break;
4542 }
4543 }
4544 if (Unused) {
4545 SmallVector<CCValAssign, 16> RVLocs;
4546 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4547 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4548 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4549 CCValAssign &VA = RVLocs[i];
4550 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4551 return false;
4552 }
4553 }
4554
4555 // Check that the call results are passed in the same way.
4556 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4557 RetCC_X86, RetCC_X86))
4558 return false;
4559 // The callee has to preserve all registers the caller needs to preserve.
4560 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4561 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4562 if (!CCMatch) {
4563 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4564 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4565 return false;
4566 }
4567
4568 unsigned StackArgsSize = 0;
4569
4570 // If the callee takes no arguments then go on to check the results of the
4571 // call.
4572 if (!Outs.empty()) {
4573 // Check if stack adjustment is needed. For now, do not do this if any
4574 // argument is passed on the stack.
4575 SmallVector<CCValAssign, 16> ArgLocs;
4576 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4577
4578 // Allocate shadow area for Win64
4579 if (IsCalleeWin64)
4580 CCInfo.AllocateStack(32, 8);
4581
4582 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4583 StackArgsSize = CCInfo.getNextStackOffset();
4584
4585 if (CCInfo.getNextStackOffset()) {
4586 // Check if the arguments are already laid out in the right way as
4587 // the caller's fixed stack objects.
4588 MachineFrameInfo &MFI = MF.getFrameInfo();
4589 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4590 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4591 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4592 CCValAssign &VA = ArgLocs[i];
4593 SDValue Arg = OutVals[i];
4594 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4595 if (VA.getLocInfo() == CCValAssign::Indirect)
4596 return false;
4597 if (!VA.isRegLoc()) {
4598 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4599 MFI, MRI, TII, VA))
4600 return false;
4601 }
4602 }
4603 }
4604
4605 bool PositionIndependent = isPositionIndependent();
4606 // If the tailcall address may be in a register, then make sure it's
4607 // possible to register allocate for it. In 32-bit, the call address can
4608 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4609 // callee-saved registers are restored. These happen to be the same
4610 // registers used to pass 'inreg' arguments so watch out for those.
4611 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4612 !isa<ExternalSymbolSDNode>(Callee)) ||
4613 PositionIndependent)) {
4614 unsigned NumInRegs = 0;
4615 // In PIC we need an extra register to formulate the address computation
4616 // for the callee.
4617 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4618
4619 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4620 CCValAssign &VA = ArgLocs[i];
4621 if (!VA.isRegLoc())
4622 continue;
4623 Register Reg = VA.getLocReg();
4624 switch (Reg) {
4625 default: break;
4626 case X86::EAX: case X86::EDX: case X86::ECX:
4627 if (++NumInRegs == MaxInRegs)
4628 return false;
4629 break;
4630 }
4631 }
4632 }
4633
4634 const MachineRegisterInfo &MRI = MF.getRegInfo();
4635 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4636 return false;
4637 }
4638
4639 bool CalleeWillPop =
4640 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4641 MF.getTarget().Options.GuaranteedTailCallOpt);
4642
4643 if (unsigned BytesToPop =
4644 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4645 // If we have bytes to pop, the callee must pop them.
4646 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4647 if (!CalleePopMatches)
4648 return false;
4649 } else if (CalleeWillPop && StackArgsSize > 0) {
4650 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4651 return false;
4652 }
4653
4654 return true;
4655}
4656
4657FastISel *
4658X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4659 const TargetLibraryInfo *libInfo) const {
4660 return X86::createFastISel(funcInfo, libInfo);
4661}
4662
4663//===----------------------------------------------------------------------===//
4664// Other Lowering Hooks
4665//===----------------------------------------------------------------------===//
4666
4667static bool MayFoldLoad(SDValue Op) {
4668 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4669}
4670
4671static bool MayFoldIntoStore(SDValue Op) {
4672 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4673}
4674
4675static bool MayFoldIntoZeroExtend(SDValue Op) {
4676 if (Op.hasOneUse()) {
4677 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4678 return (ISD::ZERO_EXTEND == Opcode);
4679 }
4680 return false;
4681}
4682
4683static bool isTargetShuffle(unsigned Opcode) {
4684 switch(Opcode) {
4685 default: return false;
4686 case X86ISD::BLENDI:
4687 case X86ISD::PSHUFB:
4688 case X86ISD::PSHUFD:
4689 case X86ISD::PSHUFHW:
4690 case X86ISD::PSHUFLW:
4691 case X86ISD::SHUFP:
4692 case X86ISD::INSERTPS:
4693 case X86ISD::EXTRQI:
4694 case X86ISD::INSERTQI:
4695 case X86ISD::PALIGNR:
4696 case X86ISD::VSHLDQ:
4697 case X86ISD::VSRLDQ:
4698 case X86ISD::MOVLHPS:
4699 case X86ISD::MOVHLPS:
4700 case X86ISD::MOVSHDUP:
4701 case X86ISD::MOVSLDUP:
4702 case X86ISD::MOVDDUP:
4703 case X86ISD::MOVSS:
4704 case X86ISD::MOVSD:
4705 case X86ISD::UNPCKL:
4706 case X86ISD::UNPCKH:
4707 case X86ISD::VBROADCAST:
4708 case X86ISD::VPERMILPI:
4709 case X86ISD::VPERMILPV:
4710 case X86ISD::VPERM2X128:
4711 case X86ISD::SHUF128:
4712 case X86ISD::VPERMIL2:
4713 case X86ISD::VPERMI:
4714 case X86ISD::VPPERM:
4715 case X86ISD::VPERMV:
4716 case X86ISD::VPERMV3:
4717 case X86ISD::VZEXT_MOVL:
4718 return true;
4719 }
4720}
4721
4722static bool isTargetShuffleVariableMask(unsigned Opcode) {
4723 switch (Opcode) {
4724 default: return false;
4725 // Target Shuffles.
4726 case X86ISD::PSHUFB:
4727 case X86ISD::VPERMILPV:
4728 case X86ISD::VPERMIL2:
4729 case X86ISD::VPPERM:
4730 case X86ISD::VPERMV:
4731 case X86ISD::VPERMV3:
4732 return true;
4733 // 'Faux' Target Shuffles.
4734 case ISD::OR:
4735 case ISD::AND:
4736 case X86ISD::ANDNP:
4737 return true;
4738 }
4739}
4740
4741SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4742 MachineFunction &MF = DAG.getMachineFunction();
4743 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4744 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4745 int ReturnAddrIndex = FuncInfo->getRAIndex();
4746
4747 if (ReturnAddrIndex == 0) {
4748 // Set up a frame object for the return address.
4749 unsigned SlotSize = RegInfo->getSlotSize();
4750 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4751 -(int64_t)SlotSize,
4752 false);
4753 FuncInfo->setRAIndex(ReturnAddrIndex);
4754 }
4755
4756 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4757}
4758
4759bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4760 bool hasSymbolicDisplacement) {
4761 // Offset should fit into 32 bit immediate field.
4762 if (!isInt<32>(Offset))
4763 return false;
4764
4765 // If we don't have a symbolic displacement - we don't have any extra
4766 // restrictions.
4767 if (!hasSymbolicDisplacement)
4768 return true;
4769
4770 // FIXME: Some tweaks might be needed for medium code model.
4771 if (M != CodeModel::Small && M != CodeModel::Kernel)
4772 return false;
4773
4774 // For small code model we assume that latest object is 16MB before end of 31
4775 // bits boundary. We may also accept pretty large negative constants knowing
4776 // that all objects are in the positive half of address space.
4777 if (M == CodeModel::Small && Offset < 16*1024*1024)
4778 return true;
4779
4780 // For kernel code model we know that all object resist in the negative half
4781 // of 32bits address space. We may not accept negative offsets, since they may
4782 // be just off and we may accept pretty large positive ones.
4783 if (M == CodeModel::Kernel && Offset >= 0)
4784 return true;
4785
4786 return false;
4787}
4788
4789/// Determines whether the callee is required to pop its own arguments.
4790/// Callee pop is necessary to support tail calls.
4791bool X86::isCalleePop(CallingConv::ID CallingConv,
4792 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4793 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4794 // can guarantee TCO.
4795 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4796 return true;
4797
4798 switch (CallingConv) {
4799 default:
4800 return false;
4801 case CallingConv::X86_StdCall:
4802 case CallingConv::X86_FastCall:
4803 case CallingConv::X86_ThisCall:
4804 case CallingConv::X86_VectorCall:
4805 return !is64Bit;
4806 }
4807}
4808
4809/// Return true if the condition is an signed comparison operation.
4810static bool isX86CCSigned(unsigned X86CC) {
4811 switch (X86CC) {
4812 default:
4813 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4813)
;
4814 case X86::COND_E:
4815 case X86::COND_NE:
4816 case X86::COND_B:
4817 case X86::COND_A:
4818 case X86::COND_BE:
4819 case X86::COND_AE:
4820 return false;
4821 case X86::COND_G:
4822 case X86::COND_GE:
4823 case X86::COND_L:
4824 case X86::COND_LE:
4825 return true;
4826 }
4827}
4828
4829static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4830 switch (SetCCOpcode) {
4831 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4831)
;
4832 case ISD::SETEQ: return X86::COND_E;
4833 case ISD::SETGT: return X86::COND_G;
4834 case ISD::SETGE: return X86::COND_GE;
4835 case ISD::SETLT: return X86::COND_L;
4836 case ISD::SETLE: return X86::COND_LE;
4837 case ISD::SETNE: return X86::COND_NE;
4838 case ISD::SETULT: return X86::COND_B;
4839 case ISD::SETUGT: return X86::COND_A;
4840 case ISD::SETULE: return X86::COND_BE;
4841 case ISD::SETUGE: return X86::COND_AE;
4842 }
4843}
4844
4845/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4846/// condition code, returning the condition code and the LHS/RHS of the
4847/// comparison to make.
4848static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4849 bool isFP, SDValue &LHS, SDValue &RHS,
4850 SelectionDAG &DAG) {
4851 if (!isFP) {
4852 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4853 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4854 // X > -1 -> X == 0, jump !sign.
4855 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4856 return X86::COND_NS;
4857 }
4858 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4859 // X < 0 -> X == 0, jump on sign.
4860 return X86::COND_S;
4861 }
4862 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
4863 // X >= 0 -> X == 0, jump on !sign.
4864 return X86::COND_NS;
4865 }
4866 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
4867 // X < 1 -> X <= 0
4868 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4869 return X86::COND_LE;
4870 }
4871 }
4872
4873 return TranslateIntegerX86CC(SetCCOpcode);
4874 }
4875
4876 // First determine if it is required or is profitable to flip the operands.
4877
4878 // If LHS is a foldable load, but RHS is not, flip the condition.
4879 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4880 !ISD::isNON_EXTLoad(RHS.getNode())) {
4881 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4882 std::swap(LHS, RHS);
4883 }
4884
4885 switch (SetCCOpcode) {
4886 default: break;
4887 case ISD::SETOLT:
4888 case ISD::SETOLE:
4889 case ISD::SETUGT:
4890 case ISD::SETUGE:
4891 std::swap(LHS, RHS);
4892 break;
4893 }
4894
4895 // On a floating point condition, the flags are set as follows:
4896 // ZF PF CF op
4897 // 0 | 0 | 0 | X > Y
4898 // 0 | 0 | 1 | X < Y
4899 // 1 | 0 | 0 | X == Y
4900 // 1 | 1 | 1 | unordered
4901 switch (SetCCOpcode) {
4902 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4902)
;
4903 case ISD::SETUEQ:
4904 case ISD::SETEQ: return X86::COND_E;
4905 case ISD::SETOLT: // flipped
4906 case ISD::SETOGT:
4907 case ISD::SETGT: return X86::COND_A;
4908 case ISD::SETOLE: // flipped
4909 case ISD::SETOGE:
4910 case ISD::SETGE: return X86::COND_AE;
4911 case ISD::SETUGT: // flipped
4912 case ISD::SETULT:
4913 case ISD::SETLT: return X86::COND_B;
4914 case ISD::SETUGE: // flipped
4915 case ISD::SETULE:
4916 case ISD::SETLE: return X86::COND_BE;
4917 case ISD::SETONE:
4918 case ISD::SETNE: return X86::COND_NE;
4919 case ISD::SETUO: return X86::COND_P;
4920 case ISD::SETO: return X86::COND_NP;
4921 case ISD::SETOEQ:
4922 case ISD::SETUNE: return X86::COND_INVALID;
4923 }
4924}
4925
4926/// Is there a floating point cmov for the specific X86 condition code?
4927/// Current x86 isa includes the following FP cmov instructions:
4928/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4929static bool hasFPCMov(unsigned X86CC) {
4930 switch (X86CC) {
4931 default:
4932 return false;
4933 case X86::COND_B:
4934 case X86::COND_BE:
4935 case X86::COND_E:
4936 case X86::COND_P:
4937 case X86::COND_A:
4938 case X86::COND_AE:
4939 case X86::COND_NE:
4940 case X86::COND_NP:
4941 return true;
4942 }
4943}
4944
4945
4946bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4947 const CallInst &I,
4948 MachineFunction &MF,
4949 unsigned Intrinsic) const {
4950
4951 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4952 if (!IntrData)
4953 return false;
4954
4955 Info.flags = MachineMemOperand::MONone;
4956 Info.offset = 0;
4957
4958 switch (IntrData->Type) {
4959 case TRUNCATE_TO_MEM_VI8:
4960 case TRUNCATE_TO_MEM_VI16:
4961 case TRUNCATE_TO_MEM_VI32: {
4962 Info.opc = ISD::INTRINSIC_VOID;
4963 Info.ptrVal = I.getArgOperand(0);
4964 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4965 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4966 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4967 ScalarVT = MVT::i8;
4968 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4969 ScalarVT = MVT::i16;
4970 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4971 ScalarVT = MVT::i32;
4972
4973 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4974 Info.align = Align::None();
4975 Info.flags |= MachineMemOperand::MOStore;
4976 break;
4977 }
4978 case GATHER:
4979 case GATHER_AVX2: {
4980 Info.opc = ISD::INTRINSIC_W_CHAIN;
4981 Info.ptrVal = nullptr;
4982 MVT DataVT = MVT::getVT(I.getType());
4983 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4984 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4985 IndexVT.getVectorNumElements());
4986 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4987 Info.align = Align::None();
4988 Info.flags |= MachineMemOperand::MOLoad;
4989 break;
4990 }
4991 case SCATTER: {
4992 Info.opc = ISD::INTRINSIC_VOID;
4993 Info.ptrVal = nullptr;
4994 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
4995 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4996 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4997 IndexVT.getVectorNumElements());
4998 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4999 Info.align = Align::None();
5000 Info.flags |= MachineMemOperand::MOStore;
5001 break;
5002 }
5003 default:
5004 return false;
5005 }
5006
5007 return true;
5008}
5009
5010/// Returns true if the target can instruction select the
5011/// specified FP immediate natively. If false, the legalizer will
5012/// materialize the FP immediate as a load from a constant pool.
5013bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5014 bool ForCodeSize) const {
5015 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5016 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5017 return true;
5018 }
5019 return false;
5020}
5021
5022bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5023 ISD::LoadExtType ExtTy,
5024 EVT NewVT) const {
5025 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"
) ? static_cast<void> (0) : __assert_fail ("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5025, __PRETTY_FUNCTION__))
;
5026
5027 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5028 // relocation target a movq or addq instruction: don't let the load shrink.
5029 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5030 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5031 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5032 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5033
5034 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5035 // those uses are extracted directly into a store, then the extract + store
5036 // can be store-folded. Therefore, it's probably not worth splitting the load.
5037 EVT VT = Load->getValueType(0);
5038 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5039 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5040 // Skip uses of the chain value. Result 0 of the node is the load value.
5041 if (UI.getUse().getResNo() != 0)
5042 continue;
5043
5044 // If this use is not an extract + store, it's probably worth splitting.
5045 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5046 UI->use_begin()->getOpcode() != ISD::STORE)
5047 return true;
5048 }
5049 // All non-chain uses are extract + store.
5050 return false;
5051 }
5052
5053 return true;
5054}
5055
5056/// Returns true if it is beneficial to convert a load of a constant
5057/// to just the constant itself.
5058bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5059 Type *Ty) const {
5060 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5060, __PRETTY_FUNCTION__))
;
5061
5062 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5063 if (BitSize == 0 || BitSize > 64)
5064 return false;
5065 return true;
5066}
5067
5068bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5069 // If we are using XMM registers in the ABI and the condition of the select is
5070 // a floating-point compare and we have blendv or conditional move, then it is
5071 // cheaper to select instead of doing a cross-register move and creating a
5072 // load that depends on the compare result.
5073 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5074 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5075}
5076
5077bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5078 // TODO: It might be a win to ease or lift this restriction, but the generic
5079 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5080 if (VT.isVector() && Subtarget.hasAVX512())
5081 return false;
5082
5083 return true;
5084}
5085
5086bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5087 SDValue C) const {
5088 // TODO: We handle scalars using custom code, but generic combining could make
5089 // that unnecessary.
5090 APInt MulC;
5091 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5092 return false;
5093
5094 // Find the type this will be legalized too. Otherwise we might prematurely
5095 // convert this to shl+add/sub and then still have to type legalize those ops.
5096 // Another choice would be to defer the decision for illegal types until
5097 // after type legalization. But constant splat vectors of i64 can't make it
5098 // through type legalization on 32-bit targets so we would need to special
5099 // case vXi64.
5100 while (getTypeAction(Context, VT) != TypeLegal)
5101 VT = getTypeToTransformTo(Context, VT);
5102
5103 // If vector multiply is legal, assume that's faster than shl + add/sub.
5104 // TODO: Multiply is a complex op with higher latency and lower throughput in
5105 // most implementations, so this check could be loosened based on type
5106 // and/or a CPU attribute.
5107 if (isOperationLegal(ISD::MUL, VT))
5108 return false;
5109
5110 // shl+add, shl+sub, shl+add+neg
5111 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5112 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5113}
5114
5115bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5116 unsigned Index) const {
5117 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5118 return false;
5119
5120 // Mask vectors support all subregister combinations and operations that
5121 // extract half of vector.
5122 if (ResVT.getVectorElementType() == MVT::i1)
5123 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5124 (Index == ResVT.getVectorNumElements()));
5125
5126 return (Index % ResVT.getVectorNumElements()) == 0;
5127}
5128
5129bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5130 unsigned Opc = VecOp.getOpcode();
5131
5132 // Assume target opcodes can't be scalarized.
5133 // TODO - do we have any exceptions?
5134 if (Opc >= ISD::BUILTIN_OP_END)
5135 return false;
5136
5137 // If the vector op is not supported, try to convert to scalar.
5138 EVT VecVT = VecOp.getValueType();
5139 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5140 return true;
5141
5142 // If the vector op is supported, but the scalar op is not, the transform may
5143 // not be worthwhile.
5144 EVT ScalarVT = VecVT.getScalarType();
5145 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5146}
5147
5148bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
5149 // TODO: Allow vectors?
5150 if (VT.isVector())
5151 return false;
5152 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5153}
5154
5155bool X86TargetLowering::isCheapToSpeculateCttz() const {
5156 // Speculate cttz only if we can directly use TZCNT.
5157 return Subtarget.hasBMI();
5158}
5159
5160bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5161 // Speculate ctlz only if we can directly use LZCNT.
5162 return Subtarget.hasLZCNT();
5163}
5164
5165bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5166 const SelectionDAG &DAG,
5167 const MachineMemOperand &MMO) const {
5168 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5169 BitcastVT.getVectorElementType() == MVT::i1)
5170 return false;
5171
5172 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5173 return false;
5174
5175 // If both types are legal vectors, it's always ok to convert them.
5176 if (LoadVT.isVector() && BitcastVT.isVector() &&
5177 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5178 return true;
5179
5180 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5181}
5182
5183bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5184 const SelectionDAG &DAG) const {
5185 // Do not merge to float value size (128 bytes) if no implicit
5186 // float attribute is set.
5187 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5188 Attribute::NoImplicitFloat);
5189
5190 if (NoFloat) {
5191 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5192 return (MemVT.getSizeInBits() <= MaxIntSize);
5193 }
5194 // Make sure we don't merge greater than our preferred vector
5195 // width.
5196 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5197 return false;
5198 return true;
5199}
5200
5201bool X86TargetLowering::isCtlzFast() const {
5202 return Subtarget.hasFastLZCNT();
5203}
5204
5205bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5206 const Instruction &AndI) const {
5207 return true;
5208}
5209
5210bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5211 EVT VT = Y.getValueType();
5212
5213 if (VT.isVector())
5214 return false;
5215
5216 if (!Subtarget.hasBMI())
5217 return false;
5218
5219 // There are only 32-bit and 64-bit forms for 'andn'.
5220 if (VT != MVT::i32 && VT != MVT::i64)
5221 return false;
5222
5223 return !isa<ConstantSDNode>(Y);
5224}
5225
5226bool X86TargetLowering::hasAndNot(SDValue Y) const {
5227 EVT VT = Y.getValueType();
5228
5229 if (!VT.isVector())
5230 return hasAndNotCompare(Y);
5231
5232 // Vector.
5233
5234 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5235 return false;
5236
5237 if (VT == MVT::v4i32)
5238 return true;
5239
5240 return Subtarget.hasSSE2();
5241}
5242
5243bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5244 return X.getValueType().isScalarInteger(); // 'bt'
5245}
5246
5247bool X86TargetLowering::
5248 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5249 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5250 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5251 SelectionDAG &DAG) const {
5252 // Does baseline recommend not to perform the fold by default?
5253 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5254 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5255 return false;
5256 // For scalars this transform is always beneficial.
5257 if (X.getValueType().isScalarInteger())
5258 return true;
5259 // If all the shift amounts are identical, then transform is beneficial even
5260 // with rudimentary SSE2 shifts.
5261 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5262 return true;
5263 // If we have AVX2 with it's powerful shift operations, then it's also good.
5264 if (Subtarget.hasAVX2())
5265 return true;
5266 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5267 return NewShiftOpcode == ISD::SHL;
5268}
5269
5270bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5271 const SDNode *N, CombineLevel Level) const {
5272 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5276, __PRETTY_FUNCTION__))
5273 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5276, __PRETTY_FUNCTION__))
5274 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5276, __PRETTY_FUNCTION__))
5275 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5276, __PRETTY_FUNCTION__))
5276 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5276, __PRETTY_FUNCTION__))
;
5277 EVT VT = N->getValueType(0);
5278 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5279 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5280 // Only fold if the shift values are equal - so it folds to AND.
5281 // TODO - we should fold if either is a non-uniform vector but we don't do
5282 // the fold for non-splats yet.
5283 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5284 }
5285 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5286}
5287
5288bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5289 EVT VT = Y.getValueType();
5290
5291 // For vectors, we don't have a preference, but we probably want a mask.
5292 if (VT.isVector())
5293 return false;
5294
5295 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5296 if (VT == MVT::i64 && !Subtarget.is64Bit())
5297 return false;
5298
5299 return true;
5300}
5301
5302bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5303 SDNode *N) const {
5304 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5305 !Subtarget.isOSWindows())
5306 return false;
5307 return true;
5308}
5309
5310bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5311 // Any legal vector type can be splatted more efficiently than
5312 // loading/spilling from memory.
5313 return isTypeLegal(VT);
5314}
5315
5316MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5317 MVT VT = MVT::getIntegerVT(NumBits);
5318 if (isTypeLegal(VT))
5319 return VT;
5320
5321 // PMOVMSKB can handle this.
5322 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5323 return MVT::v16i8;
5324
5325 // VPMOVMSKB can handle this.
5326 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5327 return MVT::v32i8;
5328
5329 // TODO: Allow 64-bit type for 32-bit target.
5330 // TODO: 512-bit types should be allowed, but make sure that those
5331 // cases are handled in combineVectorSizedSetCCEquality().
5332
5333 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5334}
5335
5336/// Val is the undef sentinel value or equal to the specified value.
5337static bool isUndefOrEqual(int Val, int CmpVal) {
5338 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5339}
5340
5341/// Val is either the undef or zero sentinel value.
5342static bool isUndefOrZero(int Val) {
5343 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5344}
5345
5346/// Return true if every element in Mask, beginning from position Pos and ending
5347/// in Pos+Size is the undef sentinel value.
5348static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5349 return llvm::all_of(Mask.slice(Pos, Size),
5350 [](int M) { return M == SM_SentinelUndef; });
5351}
5352
5353/// Return true if the mask creates a vector whose lower half is undefined.
5354static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5355 unsigned NumElts = Mask.size();
5356 return isUndefInRange(Mask, 0, NumElts / 2);
5357}
5358
5359/// Return true if the mask creates a vector whose upper half is undefined.
5360static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5361 unsigned NumElts = Mask.size();
5362 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5363}
5364
5365/// Return true if Val falls within the specified range (L, H].
5366static bool isInRange(int Val, int Low, int Hi) {
5367 return (Val >= Low && Val < Hi);
5368}
5369
5370/// Return true if the value of any element in Mask falls within the specified
5371/// range (L, H].
5372static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5373 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5374}
5375
5376/// Return true if Val is undef or if its value falls within the
5377/// specified range (L, H].
5378static bool isUndefOrInRange(int Val, int Low, int Hi) {
5379 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5380}
5381
5382/// Return true if every element in Mask is undef or if its value
5383/// falls within the specified range (L, H].
5384static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5385 return llvm::all_of(
5386 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5387}
5388
5389/// Return true if Val is undef, zero or if its value falls within the
5390/// specified range (L, H].
5391static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5392 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5393}
5394
5395/// Return true if every element in Mask is undef, zero or if its value
5396/// falls within the specified range (L, H].
5397static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5398 return llvm::all_of(
5399 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5400}
5401
5402/// Return true if every element in Mask, beginning
5403/// from position Pos and ending in Pos + Size, falls within the specified
5404/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5405static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5406 unsigned Size, int Low, int Step = 1) {
5407 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5408 if (!isUndefOrEqual(Mask[i], Low))
5409 return false;
5410 return true;
5411}
5412
5413/// Return true if every element in Mask, beginning
5414/// from position Pos and ending in Pos+Size, falls within the specified
5415/// sequential range (Low, Low+Size], or is undef or is zero.
5416static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5417 unsigned Size, int Low,
5418 int Step = 1) {
5419 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5420 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5421 return false;
5422 return true;
5423}
5424
5425/// Return true if every element in Mask, beginning
5426/// from position Pos and ending in Pos+Size is undef or is zero.
5427static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5428 unsigned Size) {
5429 return llvm::all_of(Mask.slice(Pos, Size),
5430 [](int M) { return isUndefOrZero(M); });
5431}
5432
5433/// Helper function to test whether a shuffle mask could be
5434/// simplified by widening the elements being shuffled.
5435///
5436/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5437/// leaves it in an unspecified state.
5438///
5439/// NOTE: This must handle normal vector shuffle masks and *target* vector
5440/// shuffle masks. The latter have the special property of a '-2' representing
5441/// a zero-ed lane of a vector.
5442static bool canWidenShuffleElements(ArrayRef<int> Mask,
5443 SmallVectorImpl<int> &WidenedMask) {
5444 WidenedMask.assign(Mask.size() / 2, 0);
5445 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5446 int M0 = Mask[i];
5447 int M1 = Mask[i + 1];
5448
5449 // If both elements are undef, its trivial.
5450 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5451 WidenedMask[i / 2] = SM_SentinelUndef;
5452 continue;
5453 }
5454
5455 // Check for an undef mask and a mask value properly aligned to fit with
5456 // a pair of values. If we find such a case, use the non-undef mask's value.
5457 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5458 WidenedMask[i / 2] = M1 / 2;
5459 continue;
5460 }
5461 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5462 WidenedMask[i / 2] = M0 / 2;
5463 continue;
5464 }
5465
5466 // When zeroing, we need to spread the zeroing across both lanes to widen.
5467 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5468 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5469 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5470 WidenedMask[i / 2] = SM_SentinelZero;
5471 continue;
5472 }
5473 return false;
5474 }
5475
5476 // Finally check if the two mask values are adjacent and aligned with
5477 // a pair.
5478 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5479 WidenedMask[i / 2] = M0 / 2;
5480 continue;
5481 }
5482
5483 // Otherwise we can't safely widen the elements used in this shuffle.
5484 return false;
5485 }
5486 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5487, __PRETTY_FUNCTION__))
5487 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5487, __PRETTY_FUNCTION__))
;
5488
5489 return true;
5490}
5491
5492static bool canWidenShuffleElements(ArrayRef<int> Mask,
5493 const APInt &Zeroable,
5494 bool V2IsZero,
5495 SmallVectorImpl<int> &WidenedMask) {
5496 // Create an alternative mask with info about zeroable elements.
5497 // Here we do not set undef elements as zeroable.
5498 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5499 if (V2IsZero) {
5500 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((!Zeroable.isNullValue() && "V2's non-undef elements are used?!"
) ? static_cast<void> (0) : __assert_fail ("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5500, __PRETTY_FUNCTION__))
;
5501 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5502 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5503 ZeroableMask[i] = SM_SentinelZero;
5504 }
5505 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5506}
5507
5508static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5509 SmallVector<int, 32> WidenedMask;
5510 return canWidenShuffleElements(Mask, WidenedMask);
5511}
5512
5513/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5514bool X86::isZeroNode(SDValue Elt) {
5515 return isNullConstant(Elt) || isNullFPConstant(Elt);
5516}
5517
5518// Build a vector of constants.
5519// Use an UNDEF node if MaskElt == -1.
5520// Split 64-bit constants in the 32-bit mode.
5521static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5522 const SDLoc &dl, bool IsMask = false) {
5523
5524 SmallVector<SDValue, 32> Ops;
5525 bool Split = false;
5526
5527 MVT ConstVecVT = VT;
5528 unsigned NumElts = VT.getVectorNumElements();
5529 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5530 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5531 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5532 Split = true;
5533 }
5534
5535 MVT EltVT = ConstVecVT.getVectorElementType();
5536 for (unsigned i = 0; i < NumElts; ++i) {
5537 bool IsUndef = Values[i] < 0 && IsMask;
5538 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5539 DAG.getConstant(Values[i], dl, EltVT);
5540 Ops.push_back(OpNode);
5541 if (Split)
5542 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5543 DAG.getConstant(0, dl, EltVT));
5544 }
5545 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5546 if (Split)
5547 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5548 return ConstsNode;
5549}
5550
5551static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5552 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5553 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5554, __PRETTY_FUNCTION__))
5554 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5554, __PRETTY_FUNCTION__))
;
5555 SmallVector<SDValue, 32> Ops;
5556 bool Split = false;
5557
5558 MVT ConstVecVT = VT;
5559 unsigned NumElts = VT.getVectorNumElements();
5560 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5561 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5562 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5563 Split = true;
5564 }
5565
5566 MVT EltVT = ConstVecVT.getVectorElementType();
5567 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5568 if (Undefs[i]) {
5569 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5570 continue;
5571 }
5572 const APInt &V = Bits[i];
5573 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5573, __PRETTY_FUNCTION__))
;
5574 if (Split) {
5575 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5576 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5577 } else if (EltVT == MVT::f32) {
5578 APFloat FV(APFloat::IEEEsingle(), V);
5579 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5580 } else if (EltVT == MVT::f64) {
5581 APFloat FV(APFloat::IEEEdouble(), V);
5582 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5583 } else {
5584 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5585 }
5586 }
5587
5588 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5589 return DAG.getBitcast(VT, ConstsNode);
5590}
5591
5592/// Returns a vector of specified type with all zero elements.
5593static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5594 SelectionDAG &DAG, const SDLoc &dl) {
5595 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5597, __PRETTY_FUNCTION__))
5596 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5597, __PRETTY_FUNCTION__))
5597 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5597, __PRETTY_FUNCTION__))
;
5598
5599 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5600 // type. This ensures they get CSE'd. But if the integer type is not
5601 // available, use a floating-point +0.0 instead.
5602 SDValue Vec;
5603 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5604 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5605 } else if (VT.isFloatingPoint()) {
5606 Vec = DAG.getConstantFP(+0.0, dl, VT);
5607 } else if (VT.getVectorElementType() == MVT::i1) {
5608 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5609, __PRETTY_FUNCTION__))
5609 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5609, __PRETTY_FUNCTION__))
;
5610 Vec = DAG.getConstant(0, dl, VT);
5611 } else {
5612 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5613 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5614 }
5615 return DAG.getBitcast(VT, Vec);
5616}
5617
5618static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5619 const SDLoc &dl, unsigned vectorWidth) {
5620 EVT VT = Vec.getValueType();
5621 EVT ElVT = VT.getVectorElementType();
5622 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5623 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5624 VT.getVectorNumElements()/Factor);
5625
5626 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5627 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5628 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5628, __PRETTY_FUNCTION__))
;
5629
5630 // This is the index of the first element of the vectorWidth-bit chunk
5631 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5632 IdxVal &= ~(ElemsPerChunk - 1);
5633
5634 // If the input is a buildvector just emit a smaller one.
5635 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5636 return DAG.getBuildVector(ResultVT, dl,
5637 Vec->ops().slice(IdxVal, ElemsPerChunk));
5638
5639 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5640 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5641}
5642
5643/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5644/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5645/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5646/// instructions or a simple subregister reference. Idx is an index in the
5647/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5648/// lowering EXTRACT_VECTOR_ELT operations easier.
5649static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5650 SelectionDAG &DAG, const SDLoc &dl) {
5651 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5652, __PRETTY_FUNCTION__))
5652 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5652, __PRETTY_FUNCTION__))
;
5653 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5654}
5655
5656/// Generate a DAG to grab 256-bits from a 512-bit vector.
5657static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5658 SelectionDAG &DAG, const SDLoc &dl) {
5659 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5659, __PRETTY_FUNCTION__))
;
5660 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5661}
5662
5663static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5664 SelectionDAG &DAG, const SDLoc &dl,
5665 unsigned vectorWidth) {
5666 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5667, __PRETTY_FUNCTION__))
5667 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5667, __PRETTY_FUNCTION__))
;
5668 // Inserting UNDEF is Result
5669 if (Vec.isUndef())
5670 return Result;
5671 EVT VT = Vec.getValueType();
5672 EVT ElVT = VT.getVectorElementType();
5673 EVT ResultVT = Result.getValueType();
5674
5675 // Insert the relevant vectorWidth bits.
5676 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5677 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5677, __PRETTY_FUNCTION__))
;
5678
5679 // This is the index of the first element of the vectorWidth-bit chunk
5680 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5681 IdxVal &= ~(ElemsPerChunk - 1);
5682
5683 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5684 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5685}
5686
5687/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5688/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5689/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5690/// simple superregister reference. Idx is an index in the 128 bits
5691/// we want. It need not be aligned to a 128-bit boundary. That makes
5692/// lowering INSERT_VECTOR_ELT operations easier.
5693static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5694 SelectionDAG &DAG, const SDLoc &dl) {
5695 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5695, __PRETTY_FUNCTION__))
;
5696 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5697}
5698
5699/// Widen a vector to a larger size with the same scalar type, with the new
5700/// elements either zero or undef.
5701static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5702 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5703 const SDLoc &dl) {
5704 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5706, __PRETTY_FUNCTION__))
5705 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5706, __PRETTY_FUNCTION__))
5706 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5706, __PRETTY_FUNCTION__))
;
5707 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5708 : DAG.getUNDEF(VT);
5709 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5710 DAG.getIntPtrConstant(0, dl));
5711}
5712
5713/// Widen a vector to a larger size with the same scalar type, with the new
5714/// elements either zero or undef.
5715static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
5716 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5717 const SDLoc &dl, unsigned WideSizeInBits) {
5718 assert(Vec.getValueSizeInBits() < WideSizeInBits &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5720, __PRETTY_FUNCTION__))
5719 (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5720, __PRETTY_FUNCTION__))
5720 "Unsupported vector widening type")((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5720, __PRETTY_FUNCTION__))
;
5721 unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
5722 MVT SVT = Vec.getSimpleValueType().getScalarType();
5723 MVT VT = MVT::getVectorVT(SVT, WideNumElts);
5724 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
5725}
5726
5727// Helper function to collect subvector ops that are concated together,
5728// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
5729// The subvectors in Ops are guaranteed to be the same type.
5730static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
5731 assert(Ops.empty() && "Expected an empty ops vector")((Ops.empty() && "Expected an empty ops vector") ? static_cast
<void> (0) : __assert_fail ("Ops.empty() && \"Expected an empty ops vector\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5731, __PRETTY_FUNCTION__))
;
5732
5733 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
5734 Ops.append(N->op_begin(), N->op_end());
5735 return true;
5736 }
5737
5738 if (N->getOpcode() == ISD::INSERT_SUBVECTOR &&
5739 isa<ConstantSDNode>(N->getOperand(2))) {
5740 SDValue Src = N->getOperand(0);
5741 SDValue Sub = N->getOperand(1);
5742 const APInt &Idx = N->getConstantOperandAPInt(2);
5743 EVT VT = Src.getValueType();
5744 EVT SubVT = Sub.getValueType();
5745
5746 // TODO - Handle more general insert_subvector chains.
5747 if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
5748 Idx == (VT.getVectorNumElements() / 2) &&
5749 Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
5750 Src.getOperand(1).getValueType() == SubVT &&
5751 isNullConstant(Src.getOperand(2))) {
5752 Ops.push_back(Src.getOperand(1));
5753 Ops.push_back(Sub);
5754 return true;
5755 }
5756 }
5757
5758 return false;
5759}
5760
5761// Helper for splitting operands of an operation to legal target size and
5762// apply a function on each part.
5763// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5764// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5765// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5766// The argument Builder is a function that will be applied on each split part:
5767// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5768template <typename F>
5769SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5770 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5771 F Builder, bool CheckBWI = true) {
5772 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5772, __PRETTY_FUNCTION__))
;
5773 unsigned NumSubs = 1;
5774 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5775 (!CheckBWI && Subtarget.useAVX512Regs())) {
5776 if (VT.getSizeInBits() > 512) {
5777 NumSubs = VT.getSizeInBits() / 512;
5778 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5778, __PRETTY_FUNCTION__))
;
5779 }
5780 } else if (Subtarget.hasAVX2()) {
5781 if (VT.getSizeInBits() > 256) {
5782 NumSubs = VT.getSizeInBits() / 256;
5783 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5783, __PRETTY_FUNCTION__))
;
5784 }
5785 } else {
5786 if (VT.getSizeInBits() > 128) {
5787 NumSubs = VT.getSizeInBits() / 128;
5788 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5788, __PRETTY_FUNCTION__))
;
5789 }
5790 }
5791
5792 if (NumSubs == 1)
5793 return Builder(DAG, DL, Ops);
5794
5795 SmallVector<SDValue, 4> Subs;
5796 for (unsigned i = 0; i != NumSubs; ++i) {
5797 SmallVector<SDValue, 2> SubOps;
5798 for (SDValue Op : Ops) {
5799 EVT OpVT = Op.getValueType();
5800 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5801 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5802 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5803 }
5804 Subs.push_back(Builder(DAG, DL, SubOps));
5805 }
5806 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5807}
5808
5809/// Insert i1-subvector to i1-vector.
5810static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5811 const X86Subtarget &Subtarget) {
5812
5813 SDLoc dl(Op);
5814 SDValue Vec = Op.getOperand(0);
5815 SDValue SubVec = Op.getOperand(1);
5816 SDValue Idx = Op.getOperand(2);
5817
5818 if (!isa<ConstantSDNode>(Idx))
5819 return SDValue();
5820
5821 // Inserting undef is a nop. We can just return the original vector.
5822 if (SubVec.isUndef())
5823 return Vec;
5824
5825 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5826 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5827 return Op;
5828
5829 MVT OpVT = Op.getSimpleValueType();
5830 unsigned NumElems = OpVT.getVectorNumElements();
5831
5832 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5833
5834 // Extend to natively supported kshift.
5835 MVT WideOpVT = OpVT;
5836 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5837 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5838
5839 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5840 // if necessary.
5841 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5842 // May need to promote to a legal type.
5843 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5844 DAG.getConstant(0, dl, WideOpVT),
5845 SubVec, Idx);
5846 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5847 }
5848
5849 MVT SubVecVT = SubVec.getSimpleValueType();
5850 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5851
5852 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5854, __PRETTY_FUNCTION__))
5853 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5854, __PRETTY_FUNCTION__))
5854 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-10~++20200110111110+a1cc19b5814/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5854, __PRETTY_FUNCTION__))
;
5855
5856 SDValue Undef = DAG.getUNDEF(WideOpVT);
5857
5858 if (IdxVal == 0) {
5859 // Zero lower bits of the Vec
5860 SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
5861 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5862 ZeroIdx);
5863 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5864 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5865 // Merge them together, SubVec should be zero extended.
5866 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5867 DAG.getConstant(0, dl, WideOpVT),
5868 SubVec, ZeroIdx);
5869 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5870 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5871