Bug Summary

File:llvm/lib/Target/X86/X86ISelLowering.cpp
Warning:line 36169, column 5
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-11-29-190409-37574-1 -x c++ /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp

/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/IntrinsicLowering.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineJumpTableInfo.h"
38#include "llvm/CodeGen/MachineModuleInfo.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetLowering.h"
41#include "llvm/CodeGen/WinEHFuncInfo.h"
42#include "llvm/IR/CallingConv.h"
43#include "llvm/IR/Constants.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/DiagnosticInfo.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalAlias.h"
48#include "llvm/IR/GlobalVariable.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/MC/MCAsmInfo.h"
52#include "llvm/MC/MCContext.h"
53#include "llvm/MC/MCExpr.h"
54#include "llvm/MC/MCSymbol.h"
55#include "llvm/Support/CommandLine.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/Support/KnownBits.h"
59#include "llvm/Support/MathExtras.h"
60#include "llvm/Target/TargetOptions.h"
61#include <algorithm>
62#include <bitset>
63#include <cctype>
64#include <numeric>
65using namespace llvm;
66
67#define DEBUG_TYPE"x86-isel" "x86-isel"
68
69STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
70
71static cl::opt<int> ExperimentalPrefLoopAlignment(
72 "x86-experimental-pref-loop-alignment", cl::init(4),
73 cl::desc(
74 "Sets the preferable loop alignment for experiments (as log2 bytes)"
75 "(the last x86-experimental-pref-loop-alignment bits"
76 " of the loop header PC will be 0)."),
77 cl::Hidden);
78
79static cl::opt<bool> MulConstantOptimization(
80 "mul-constant-optimization", cl::init(true),
81 cl::desc("Replace 'mul x, Const' with more effective instructions like "
82 "SHIFT, LEA, etc."),
83 cl::Hidden);
84
85static cl::opt<bool> ExperimentalUnorderedISEL(
86 "x86-experimental-unordered-atomic-isel", cl::init(false),
87 cl::desc("Use LoadSDNode and StoreSDNode instead of "
88 "AtomicSDNode for unordered atomic loads and "
89 "stores respectively."),
90 cl::Hidden);
91
92/// Call this when the user attempts to do something unsupported, like
93/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
94/// report_fatal_error, so calling code should attempt to recover without
95/// crashing.
96static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
97 const char *Msg) {
98 MachineFunction &MF = DAG.getMachineFunction();
99 DAG.getContext()->diagnose(
100 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
101}
102
103X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
104 const X86Subtarget &STI)
105 : TargetLowering(TM), Subtarget(STI) {
106 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
107 X86ScalarSSEf64 = Subtarget.hasSSE2();
108 X86ScalarSSEf32 = Subtarget.hasSSE1();
109 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
110
111 // Set up the TargetLowering object.
112
113 // X86 is weird. It always uses i8 for shift amounts and setcc results.
114 setBooleanContents(ZeroOrOneBooleanContent);
115 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
116 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
117
118 // For 64-bit, since we have so many registers, use the ILP scheduler.
119 // For 32-bit, use the register pressure specific scheduling.
120 // For Atom, always use ILP scheduling.
121 if (Subtarget.isAtom())
122 setSchedulingPreference(Sched::ILP);
123 else if (Subtarget.is64Bit())
124 setSchedulingPreference(Sched::ILP);
125 else
126 setSchedulingPreference(Sched::RegPressure);
127 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
128 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
129
130 // Bypass expensive divides and use cheaper ones.
131 if (TM.getOptLevel() >= CodeGenOpt::Default) {
132 if (Subtarget.hasSlowDivide32())
133 addBypassSlowDiv(32, 8);
134 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
135 addBypassSlowDiv(64, 32);
136 }
137
138 if (Subtarget.isTargetWindowsMSVC() ||
139 Subtarget.isTargetWindowsItanium()) {
140 // Setup Windows compiler runtime calls.
141 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
142 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
143 setLibcallName(RTLIB::SREM_I64, "_allrem");
144 setLibcallName(RTLIB::UREM_I64, "_aullrem");
145 setLibcallName(RTLIB::MUL_I64, "_allmul");
146 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
147 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
148 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
149 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
150 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
151 }
152
153 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
154 // MSVCRT doesn't have powi; fall back to pow
155 setLibcallName(RTLIB::POWI_F32, nullptr);
156 setLibcallName(RTLIB::POWI_F64, nullptr);
157 }
158
159 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
160 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
161 // FIXME: Should we be limiting the atomic size on other configs? Default is
162 // 1024.
163 if (!Subtarget.hasCmpxchg8b())
164 setMaxAtomicSizeInBitsSupported(32);
165
166 // Set up the register classes.
167 addRegisterClass(MVT::i8, &X86::GR8RegClass);
168 addRegisterClass(MVT::i16, &X86::GR16RegClass);
169 addRegisterClass(MVT::i32, &X86::GR32RegClass);
170 if (Subtarget.is64Bit())
171 addRegisterClass(MVT::i64, &X86::GR64RegClass);
172
173 for (MVT VT : MVT::integer_valuetypes())
174 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
175
176 // We don't accept any truncstore of integer registers.
177 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
178 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
179 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
180 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
181 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
182 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
183
184 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
185
186 // SETOEQ and SETUNE require checking two conditions.
187 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
188 setCondCodeAction(ISD::SETOEQ, VT, Expand);
189 setCondCodeAction(ISD::SETUNE, VT, Expand);
190 }
191
192 // Integer absolute.
193 if (Subtarget.hasCMov()) {
194 setOperationAction(ISD::ABS , MVT::i16 , Custom);
195 setOperationAction(ISD::ABS , MVT::i32 , Custom);
196 if (Subtarget.is64Bit())
197 setOperationAction(ISD::ABS , MVT::i64 , Custom);
198 }
199
200 // Funnel shifts.
201 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
202 // For slow shld targets we only lower for code size.
203 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
204
205 setOperationAction(ShiftOp , MVT::i8 , Custom);
206 setOperationAction(ShiftOp , MVT::i16 , Custom);
207 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
208 if (Subtarget.is64Bit())
209 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
210 }
211
212 if (!Subtarget.useSoftFloat()) {
213 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
214 // operation.
215 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
216 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
217 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
218 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
219 // We have an algorithm for SSE2, and we turn this into a 64-bit
220 // FILD or VCVTUSI2SS/SD for other targets.
221 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
222 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
223 // We have an algorithm for SSE2->double, and we turn this into a
224 // 64-bit FILD followed by conditional FADD for other targets.
225 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
226 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
227
228 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
229 // this operation.
230 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
231 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
232 // SSE has no i16 to fp conversion, only i32. We promote in the handler
233 // to allow f80 to use i16 and f64 to use i16 with sse1 only
234 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
235 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
236 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
237 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
238 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
239 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
240 // are Legal, f80 is custom lowered.
241 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
242 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
243
244 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
245 // this operation.
246 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
247 // FIXME: This doesn't generate invalid exception when it should. PR44019.
248 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
249 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
250 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
251 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
252 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
253 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
254 // are Legal, f80 is custom lowered.
255 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
256 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
257
258 // Handle FP_TO_UINT by promoting the destination to a larger signed
259 // conversion.
260 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
261 // FIXME: This doesn't generate invalid exception when it should. PR44019.
262 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
263 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
264 // FIXME: This doesn't generate invalid exception when it should. PR44019.
265 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
266 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
267 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
268 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
269 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
270
271 setOperationAction(ISD::LRINT, MVT::f32, Custom);
272 setOperationAction(ISD::LRINT, MVT::f64, Custom);
273 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
274 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
275
276 if (!Subtarget.is64Bit()) {
277 setOperationAction(ISD::LRINT, MVT::i64, Custom);
278 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
279 }
280 }
281
282 // Handle address space casts between mixed sized pointers.
283 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
284 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
285
286 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
287 if (!X86ScalarSSEf64) {
288 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
289 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
290 if (Subtarget.is64Bit()) {
291 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
292 // Without SSE, i64->f64 goes through memory.
293 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
294 }
295 } else if (!Subtarget.is64Bit())
296 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
297
298 // Scalar integer divide and remainder are lowered to use operations that
299 // produce two results, to match the available instructions. This exposes
300 // the two-result form to trivial CSE, which is able to combine x/y and x%y
301 // into a single instruction.
302 //
303 // Scalar integer multiply-high is also lowered to use two-result
304 // operations, to match the available instructions. However, plain multiply
305 // (low) operations are left as Legal, as there are single-result
306 // instructions for this in x86. Using the two-result multiply instructions
307 // when both high and low results are needed must be arranged by dagcombine.
308 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
309 setOperationAction(ISD::MULHS, VT, Expand);
310 setOperationAction(ISD::MULHU, VT, Expand);
311 setOperationAction(ISD::SDIV, VT, Expand);
312 setOperationAction(ISD::UDIV, VT, Expand);
313 setOperationAction(ISD::SREM, VT, Expand);
314 setOperationAction(ISD::UREM, VT, Expand);
315 }
316
317 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
318 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
319 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
320 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
321 setOperationAction(ISD::BR_CC, VT, Expand);
322 setOperationAction(ISD::SELECT_CC, VT, Expand);
323 }
324 if (Subtarget.is64Bit())
325 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
326 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
327 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
328 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
329
330 setOperationAction(ISD::FREM , MVT::f32 , Expand);
331 setOperationAction(ISD::FREM , MVT::f64 , Expand);
332 setOperationAction(ISD::FREM , MVT::f80 , Expand);
333 setOperationAction(ISD::FREM , MVT::f128 , Expand);
334 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
335
336 // Promote the i8 variants and force them on up to i32 which has a shorter
337 // encoding.
338 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
339 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
340 if (!Subtarget.hasBMI()) {
341 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
342 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
343 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
344 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
345 if (Subtarget.is64Bit()) {
346 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
347 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
348 }
349 }
350
351 if (Subtarget.hasLZCNT()) {
352 // When promoting the i8 variants, force them to i32 for a shorter
353 // encoding.
354 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
355 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
356 } else {
357 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
358 if (VT == MVT::i64 && !Subtarget.is64Bit())
359 continue;
360 setOperationAction(ISD::CTLZ , VT, Custom);
361 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
362 }
363 }
364
365 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
366 ISD::STRICT_FP_TO_FP16}) {
367 // Special handling for half-precision floating point conversions.
368 // If we don't have F16C support, then lower half float conversions
369 // into library calls.
370 setOperationAction(
371 Op, MVT::f32,
372 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
373 // There's never any support for operations beyond MVT::f32.
374 setOperationAction(Op, MVT::f64, Expand);
375 setOperationAction(Op, MVT::f80, Expand);
376 setOperationAction(Op, MVT::f128, Expand);
377 }
378
379 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
380 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
381 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
382 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
383 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
384 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
385 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
386 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
387
388 setOperationAction(ISD::PARITY, MVT::i8, Custom);
389 if (Subtarget.hasPOPCNT()) {
390 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
391 } else {
392 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
393 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
394 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
395 if (Subtarget.is64Bit())
396 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
397 else
398 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
399
400 setOperationAction(ISD::PARITY, MVT::i16, Custom);
401 setOperationAction(ISD::PARITY, MVT::i32, Custom);
402 if (Subtarget.is64Bit())
403 setOperationAction(ISD::PARITY, MVT::i64, Custom);
404 }
405
406 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
407
408 if (!Subtarget.hasMOVBE())
409 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
410
411 // X86 wants to expand cmov itself.
412 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
413 setOperationAction(ISD::SELECT, VT, Custom);
414 setOperationAction(ISD::SETCC, VT, Custom);
415 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
416 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
417 }
418 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
419 if (VT == MVT::i64 && !Subtarget.is64Bit())
420 continue;
421 setOperationAction(ISD::SELECT, VT, Custom);
422 setOperationAction(ISD::SETCC, VT, Custom);
423 }
424
425 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
426 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
427 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
428
429 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
430 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
431 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
432 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
433 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
434 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
435 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
436 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
437
438 // Darwin ABI issue.
439 for (auto VT : { MVT::i32, MVT::i64 }) {
440 if (VT == MVT::i64 && !Subtarget.is64Bit())
441 continue;
442 setOperationAction(ISD::ConstantPool , VT, Custom);
443 setOperationAction(ISD::JumpTable , VT, Custom);
444 setOperationAction(ISD::GlobalAddress , VT, Custom);
445 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
446 setOperationAction(ISD::ExternalSymbol , VT, Custom);
447 setOperationAction(ISD::BlockAddress , VT, Custom);
448 }
449
450 // 64-bit shl, sra, srl (iff 32-bit x86)
451 for (auto VT : { MVT::i32, MVT::i64 }) {
452 if (VT == MVT::i64 && !Subtarget.is64Bit())
453 continue;
454 setOperationAction(ISD::SHL_PARTS, VT, Custom);
455 setOperationAction(ISD::SRA_PARTS, VT, Custom);
456 setOperationAction(ISD::SRL_PARTS, VT, Custom);
457 }
458
459 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
460 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
461
462 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
463
464 // Expand certain atomics
465 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
466 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
467 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
468 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
469 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
470 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
471 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
472 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
473 }
474
475 if (!Subtarget.is64Bit())
476 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
477
478 if (Subtarget.hasCmpxchg16b()) {
479 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
480 }
481
482 // FIXME - use subtarget debug flags
483 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
484 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
485 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
486 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
487 }
488
489 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
490 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
491
492 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
493 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
494
495 setOperationAction(ISD::TRAP, MVT::Other, Legal);
496 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
497
498 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
499 setOperationAction(ISD::VASTART , MVT::Other, Custom);
500 setOperationAction(ISD::VAEND , MVT::Other, Expand);
501 bool Is64Bit = Subtarget.is64Bit();
502 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
503 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
504
505 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
506 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
507
508 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
509
510 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
511 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
512 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
513
514 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
515 // f32 and f64 use SSE.
516 // Set up the FP register classes.
517 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
518 : &X86::FR32RegClass);
519 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
520 : &X86::FR64RegClass);
521
522 // Disable f32->f64 extload as we can only generate this in one instruction
523 // under optsize. So its easier to pattern match (fpext (load)) for that
524 // case instead of needing to emit 2 instructions for extload in the
525 // non-optsize case.
526 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
527
528 for (auto VT : { MVT::f32, MVT::f64 }) {
529 // Use ANDPD to simulate FABS.
530 setOperationAction(ISD::FABS, VT, Custom);
531
532 // Use XORP to simulate FNEG.
533 setOperationAction(ISD::FNEG, VT, Custom);
534
535 // Use ANDPD and ORPD to simulate FCOPYSIGN.
536 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
537
538 // These might be better off as horizontal vector ops.
539 setOperationAction(ISD::FADD, VT, Custom);
540 setOperationAction(ISD::FSUB, VT, Custom);
541
542 // We don't support sin/cos/fmod
543 setOperationAction(ISD::FSIN , VT, Expand);
544 setOperationAction(ISD::FCOS , VT, Expand);
545 setOperationAction(ISD::FSINCOS, VT, Expand);
546 }
547
548 // Lower this to MOVMSK plus an AND.
549 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
550 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
551
552 } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
553 (UseX87 || Is64Bit)) {
554 // Use SSE for f32, x87 for f64.
555 // Set up the FP register classes.
556 addRegisterClass(MVT::f32, &X86::FR32RegClass);
557 if (UseX87)
558 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
559
560 // Use ANDPS to simulate FABS.
561 setOperationAction(ISD::FABS , MVT::f32, Custom);
562
563 // Use XORP to simulate FNEG.
564 setOperationAction(ISD::FNEG , MVT::f32, Custom);
565
566 if (UseX87)
567 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
568
569 // Use ANDPS and ORPS to simulate FCOPYSIGN.
570 if (UseX87)
571 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
572 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
573
574 // We don't support sin/cos/fmod
575 setOperationAction(ISD::FSIN , MVT::f32, Expand);
576 setOperationAction(ISD::FCOS , MVT::f32, Expand);
577 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
578
579 if (UseX87) {
580 // Always expand sin/cos functions even though x87 has an instruction.
581 setOperationAction(ISD::FSIN, MVT::f64, Expand);
582 setOperationAction(ISD::FCOS, MVT::f64, Expand);
583 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
584 }
585 } else if (UseX87) {
586 // f32 and f64 in x87.
587 // Set up the FP register classes.
588 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
589 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
590
591 for (auto VT : { MVT::f32, MVT::f64 }) {
592 setOperationAction(ISD::UNDEF, VT, Expand);
593 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
594
595 // Always expand sin/cos functions even though x87 has an instruction.
596 setOperationAction(ISD::FSIN , VT, Expand);
597 setOperationAction(ISD::FCOS , VT, Expand);
598 setOperationAction(ISD::FSINCOS, VT, Expand);
599 }
600 }
601
602 // Expand FP32 immediates into loads from the stack, save special cases.
603 if (isTypeLegal(MVT::f32)) {
604 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
605 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
606 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
607 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
608 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
609 } else // SSE immediates.
610 addLegalFPImmediate(APFloat(+0.0f)); // xorps
611 }
612 // Expand FP64 immediates into loads from the stack, save special cases.
613 if (isTypeLegal(MVT::f64)) {
614 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
615 addLegalFPImmediate(APFloat(+0.0)); // FLD0
616 addLegalFPImmediate(APFloat(+1.0)); // FLD1
617 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
618 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
619 } else // SSE immediates.
620 addLegalFPImmediate(APFloat(+0.0)); // xorpd
621 }
622 // Handle constrained floating-point operations of scalar.
623 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
624 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
625 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
626 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
627 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
628 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
629 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
630 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
631 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
632 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
633 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
634 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
635 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
636
637 // We don't support FMA.
638 setOperationAction(ISD::FMA, MVT::f64, Expand);
639 setOperationAction(ISD::FMA, MVT::f32, Expand);
640
641 // f80 always uses X87.
642 if (UseX87) {
643 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
644 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
645 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
646 {
647 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
648 addLegalFPImmediate(TmpFlt); // FLD0
649 TmpFlt.changeSign();
650 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
651
652 bool ignored;
653 APFloat TmpFlt2(+1.0);
654 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
655 &ignored);
656 addLegalFPImmediate(TmpFlt2); // FLD1
657 TmpFlt2.changeSign();
658 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
659 }
660
661 // Always expand sin/cos functions even though x87 has an instruction.
662 setOperationAction(ISD::FSIN , MVT::f80, Expand);
663 setOperationAction(ISD::FCOS , MVT::f80, Expand);
664 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
665
666 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
667 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
668 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
669 setOperationAction(ISD::FRINT, MVT::f80, Expand);
670 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
671 setOperationAction(ISD::FMA, MVT::f80, Expand);
672 setOperationAction(ISD::LROUND, MVT::f80, Expand);
673 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
674 setOperationAction(ISD::LRINT, MVT::f80, Custom);
675 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
676
677 // Handle constrained floating-point operations of scalar.
678 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
679 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
680 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
681 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
682 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
683 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
684 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
685 // as Custom.
686 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
687 }
688
689 // f128 uses xmm registers, but most operations require libcalls.
690 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
691 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
692 : &X86::VR128RegClass);
693
694 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
695
696 setOperationAction(ISD::FADD, MVT::f128, LibCall);
697 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
698 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
699 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
700 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
701 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
702 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
703 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
704 setOperationAction(ISD::FMA, MVT::f128, LibCall);
705 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
706
707 setOperationAction(ISD::FABS, MVT::f128, Custom);
708 setOperationAction(ISD::FNEG, MVT::f128, Custom);
709 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
710
711 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
712 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
713 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
714 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
715 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
716 // No STRICT_FSINCOS
717 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
718 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
719
720 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
721 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
722 // We need to custom handle any FP_ROUND with an f128 input, but
723 // LegalizeDAG uses the result type to know when to run a custom handler.
724 // So we have to list all legal floating point result types here.
725 if (isTypeLegal(MVT::f32)) {
726 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
727 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
728 }
729 if (isTypeLegal(MVT::f64)) {
730 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
731 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
732 }
733 if (isTypeLegal(MVT::f80)) {
734 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
735 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
736 }
737
738 setOperationAction(ISD::SETCC, MVT::f128, Custom);
739
740 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
741 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
742 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
743 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
744 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
745 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
746 }
747
748 // Always use a library call for pow.
749 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
750 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
751 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
752 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
753
754 setOperationAction(ISD::FLOG, MVT::f80, Expand);
755 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
756 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
757 setOperationAction(ISD::FEXP, MVT::f80, Expand);
758 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
759 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
760 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
761
762 // Some FP actions are always expanded for vector types.
763 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
764 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
765 setOperationAction(ISD::FSIN, VT, Expand);
766 setOperationAction(ISD::FSINCOS, VT, Expand);
767 setOperationAction(ISD::FCOS, VT, Expand);
768 setOperationAction(ISD::FREM, VT, Expand);
769 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
770 setOperationAction(ISD::FPOW, VT, Expand);
771 setOperationAction(ISD::FLOG, VT, Expand);
772 setOperationAction(ISD::FLOG2, VT, Expand);
773 setOperationAction(ISD::FLOG10, VT, Expand);
774 setOperationAction(ISD::FEXP, VT, Expand);
775 setOperationAction(ISD::FEXP2, VT, Expand);
776 }
777
778 // First set operation action for all vector types to either promote
779 // (for widening) or expand (for scalarization). Then we will selectively
780 // turn on ones that can be effectively codegen'd.
781 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
782 setOperationAction(ISD::SDIV, VT, Expand);
783 setOperationAction(ISD::UDIV, VT, Expand);
784 setOperationAction(ISD::SREM, VT, Expand);
785 setOperationAction(ISD::UREM, VT, Expand);
786 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
787 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
788 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
789 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
790 setOperationAction(ISD::FMA, VT, Expand);
791 setOperationAction(ISD::FFLOOR, VT, Expand);
792 setOperationAction(ISD::FCEIL, VT, Expand);
793 setOperationAction(ISD::FTRUNC, VT, Expand);
794 setOperationAction(ISD::FRINT, VT, Expand);
795 setOperationAction(ISD::FNEARBYINT, VT, Expand);
796 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
797 setOperationAction(ISD::MULHS, VT, Expand);
798 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
799 setOperationAction(ISD::MULHU, VT, Expand);
800 setOperationAction(ISD::SDIVREM, VT, Expand);
801 setOperationAction(ISD::UDIVREM, VT, Expand);
802 setOperationAction(ISD::CTPOP, VT, Expand);
803 setOperationAction(ISD::CTTZ, VT, Expand);
804 setOperationAction(ISD::CTLZ, VT, Expand);
805 setOperationAction(ISD::ROTL, VT, Expand);
806 setOperationAction(ISD::ROTR, VT, Expand);
807 setOperationAction(ISD::BSWAP, VT, Expand);
808 setOperationAction(ISD::SETCC, VT, Expand);
809 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
810 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
811 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
812 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
813 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
814 setOperationAction(ISD::TRUNCATE, VT, Expand);
815 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
816 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
817 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
818 setOperationAction(ISD::SELECT_CC, VT, Expand);
819 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
820 setTruncStoreAction(InnerVT, VT, Expand);
821
822 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
823 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
824
825 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
826 // types, we have to deal with them whether we ask for Expansion or not.
827 // Setting Expand causes its own optimisation problems though, so leave
828 // them legal.
829 if (VT.getVectorElementType() == MVT::i1)
830 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
831
832 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
833 // split/scalarized right now.
834 if (VT.getVectorElementType() == MVT::f16)
835 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
836 }
837 }
838
839 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
840 // with -msoft-float, disable use of MMX as well.
841 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
842 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
843 // No operations on x86mmx supported, everything uses intrinsics.
844 }
845
846 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
847 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
848 : &X86::VR128RegClass);
849
850 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
851 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
852 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
853 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
854 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
855 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
856 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
857 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
858
859 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
860 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
861
862 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
863 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
864 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
865 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
866 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
867 }
868
869 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
870 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
871 : &X86::VR128RegClass);
872
873 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
874 // registers cannot be used even for integer operations.
875 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
876 : &X86::VR128RegClass);
877 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
878 : &X86::VR128RegClass);
879 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
880 : &X86::VR128RegClass);
881 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
882 : &X86::VR128RegClass);
883
884 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
885 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
886 setOperationAction(ISD::SDIV, VT, Custom);
887 setOperationAction(ISD::SREM, VT, Custom);
888 setOperationAction(ISD::UDIV, VT, Custom);
889 setOperationAction(ISD::UREM, VT, Custom);
890 }
891
892 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
893 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
894 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
895
896 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
897 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
898 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
899 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
900 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
901 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
902 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
903 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
904 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
905 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
906 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
907 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
908 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
909
910 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
911 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
912 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
913 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
914 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
915 }
916
917 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
918 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
919 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
920 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
921 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
922 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
923 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
924 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
925 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
926 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
927 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
928 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
929
930 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
931 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
932 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
933
934 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
935 setOperationAction(ISD::SETCC, VT, Custom);
936 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
937 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
938 setOperationAction(ISD::CTPOP, VT, Custom);
939 setOperationAction(ISD::ABS, VT, Custom);
940
941 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
942 // setcc all the way to isel and prefer SETGT in some isel patterns.
943 setCondCodeAction(ISD::SETLT, VT, Custom);
944 setCondCodeAction(ISD::SETLE, VT, Custom);
945 }
946
947 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
948 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
949 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
950 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
951 setOperationAction(ISD::VSELECT, VT, Custom);
952 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
953 }
954
955 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
956 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
957 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
958 setOperationAction(ISD::VSELECT, VT, Custom);
959
960 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
961 continue;
962
963 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
964 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
965 }
966
967 // Custom lower v2i64 and v2f64 selects.
968 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
969 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
970 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
971 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
972 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
973
974 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
975 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
976 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
977 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
978
979 // Custom legalize these to avoid over promotion or custom promotion.
980 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
981 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
982 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
983 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
984 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
985 }
986
987 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
988 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
989 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
990 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
991
992 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
993 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
994
995 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
996 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
997
998 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
999 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1000 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1001 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1002 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1003
1004 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1005 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1006 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1007 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1008
1009 // We want to legalize this to an f64 load rather than an i64 load on
1010 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1011 // store.
1012 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1013 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1014 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1015 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1016 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1017 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1018
1019 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1020 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1021 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1022 if (!Subtarget.hasAVX512())
1023 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1024
1025 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1026 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1027 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1028
1029 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1030
1031 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1032 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1033 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1034 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1035 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1036 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1037
1038 // In the customized shift lowering, the legal v4i32/v2i64 cases
1039 // in AVX2 will be recognized.
1040 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1041 setOperationAction(ISD::SRL, VT, Custom);
1042 setOperationAction(ISD::SHL, VT, Custom);
1043 setOperationAction(ISD::SRA, VT, Custom);
1044 }
1045
1046 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1047 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1048
1049 // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
1050 // shifts) is better.
1051 if (!Subtarget.useAVX512Regs() &&
1052 !(Subtarget.hasBWI() && Subtarget.hasVLX()))
1053 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1054
1055 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1056 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1057 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1058 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1059 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1060 }
1061
1062 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1063 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1064 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1065 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1066 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1067 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1068 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1069 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1070 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1071
1072 // These might be better off as horizontal vector ops.
1073 setOperationAction(ISD::ADD, MVT::i16, Custom);
1074 setOperationAction(ISD::ADD, MVT::i32, Custom);
1075 setOperationAction(ISD::SUB, MVT::i16, Custom);
1076 setOperationAction(ISD::SUB, MVT::i32, Custom);
1077 }
1078
1079 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1080 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1081 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1082 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1083 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1084 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1085 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1086 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1087 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1088 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1089 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1090 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1091 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1092 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1093
1094 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1095 }
1096
1097 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1098 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1099 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1100 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1101 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1102 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1103 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1104 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1105
1106 // FIXME: Do we need to handle scalar-to-vector here?
1107 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1108
1109 // We directly match byte blends in the backend as they match the VSELECT
1110 // condition form.
1111 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1112
1113 // SSE41 brings specific instructions for doing vector sign extend even in
1114 // cases where we don't have SRA.
1115 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1116 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1117 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1118 }
1119
1120 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1121 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1122 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1123 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1124 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1125 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1126 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1127 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1128 }
1129
1130 // i8 vectors are custom because the source register and source
1131 // source memory operand types are not the same width.
1132 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1133
1134 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1135 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1136 // do the pre and post work in the vector domain.
1137 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1138 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1139 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1140 // so that DAG combine doesn't try to turn it into uint_to_fp.
1141 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1142 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1143 }
1144 }
1145
1146 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1147 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1148 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1149 setOperationAction(ISD::ROTL, VT, Custom);
1150
1151 // XOP can efficiently perform BITREVERSE with VPPERM.
1152 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1153 setOperationAction(ISD::BITREVERSE, VT, Custom);
1154
1155 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1156 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1157 setOperationAction(ISD::BITREVERSE, VT, Custom);
1158 }
1159
1160 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1161 bool HasInt256 = Subtarget.hasInt256();
1162
1163 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1164 : &X86::VR256RegClass);
1165 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1166 : &X86::VR256RegClass);
1167 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1168 : &X86::VR256RegClass);
1169 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1170 : &X86::VR256RegClass);
1171 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1172 : &X86::VR256RegClass);
1173 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1174 : &X86::VR256RegClass);
1175
1176 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1177 setOperationAction(ISD::FFLOOR, VT, Legal);
1178 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1179 setOperationAction(ISD::FCEIL, VT, Legal);
1180 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1181 setOperationAction(ISD::FTRUNC, VT, Legal);
1182 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1183 setOperationAction(ISD::FRINT, VT, Legal);
1184 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1185 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1186 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1187 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1188 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1189
1190 setOperationAction(ISD::FROUND, VT, Custom);
1191
1192 setOperationAction(ISD::FNEG, VT, Custom);
1193 setOperationAction(ISD::FABS, VT, Custom);
1194 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1195 }
1196
1197 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1198 // even though v8i16 is a legal type.
1199 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1200 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1201 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1202 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1203 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1204 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1205
1206 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1207 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1208
1209 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1210 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1211 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1212 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1213 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1214 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1215 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1216 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1217 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1218 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1219 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1220 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1221
1222 if (!Subtarget.hasAVX512())
1223 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1224
1225 // In the customized shift lowering, the legal v8i32/v4i64 cases
1226 // in AVX2 will be recognized.
1227 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1228 setOperationAction(ISD::SRL, VT, Custom);
1229 setOperationAction(ISD::SHL, VT, Custom);
1230 setOperationAction(ISD::SRA, VT, Custom);
1231 }
1232
1233 // These types need custom splitting if their input is a 128-bit vector.
1234 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1235 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1236 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1237 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1238
1239 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1240 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1241
1242 // With BWI, expanding (and promoting the shifts) is the better.
1243 if (!Subtarget.useBWIRegs())
1244 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1245
1246 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1247 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1248 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1249 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1250 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1251 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1252
1253 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1254 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1255 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1256 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1257 }
1258
1259 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1260 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1261 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1262 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1263
1264 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1265 setOperationAction(ISD::SETCC, VT, Custom);
1266 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1267 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1268 setOperationAction(ISD::CTPOP, VT, Custom);
1269 setOperationAction(ISD::CTLZ, VT, Custom);
1270
1271 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1272 // setcc all the way to isel and prefer SETGT in some isel patterns.
1273 setCondCodeAction(ISD::SETLT, VT, Custom);
1274 setCondCodeAction(ISD::SETLE, VT, Custom);
1275 }
1276
1277 if (Subtarget.hasAnyFMA()) {
1278 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1279 MVT::v2f64, MVT::v4f64 }) {
1280 setOperationAction(ISD::FMA, VT, Legal);
1281 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1282 }
1283 }
1284
1285 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1286 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1287 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1288 }
1289
1290 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1291 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1292 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1293 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1294
1295 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1296 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1297 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1298 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1299 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1300 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1301
1302 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1303 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1304 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1305 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1306 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1307
1308 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1309 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1310 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1311 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1312 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1313 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1314 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1315 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1316 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1317 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1318 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1319 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1320
1321 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1322 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1323 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1324 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1325 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1326 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1327 }
1328
1329 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1330 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1331 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1332 }
1333
1334 if (HasInt256) {
1335 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1336 // when we have a 256bit-wide blend with immediate.
1337 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1338 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1339
1340 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1341 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1342 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1343 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1344 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1345 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1346 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1347 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1348 }
1349 }
1350
1351 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1352 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1353 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1354 setOperationAction(ISD::MSTORE, VT, Legal);
1355 }
1356
1357 // Extract subvector is special because the value type
1358 // (result) is 128-bit but the source is 256-bit wide.
1359 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1360 MVT::v4f32, MVT::v2f64 }) {
1361 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1362 }
1363
1364 // Custom lower several nodes for 256-bit types.
1365 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1366 MVT::v8f32, MVT::v4f64 }) {
1367 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1368 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1369 setOperationAction(ISD::VSELECT, VT, Custom);
1370 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1371 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1372 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1373 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1374 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1375 setOperationAction(ISD::STORE, VT, Custom);
1376 }
1377
1378 if (HasInt256) {
1379 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1380
1381 // Custom legalize 2x32 to get a little better code.
1382 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1383 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1384
1385 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1386 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1387 setOperationAction(ISD::MGATHER, VT, Custom);
1388 }
1389 }
1390
1391 // This block controls legalization of the mask vector sizes that are
1392 // available with AVX512. 512-bit vectors are in a separate block controlled
1393 // by useAVX512Regs.
1394 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1395 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1396 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1397 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1398 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1399 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1400
1401 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1402 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1403 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1404
1405 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1406 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1407 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1408 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1409 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1410 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1411 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1412 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1413 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1414 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1415 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1416 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1417
1418 // There is no byte sized k-register load or store without AVX512DQ.
1419 if (!Subtarget.hasDQI()) {
1420 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1421 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1422 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1423 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1424
1425 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1426 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1427 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1428 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1429 }
1430
1431 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1432 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1433 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1434 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1435 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1436 }
1437
1438 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1439 setOperationAction(ISD::ADD, VT, Custom);
1440 setOperationAction(ISD::SUB, VT, Custom);
1441 setOperationAction(ISD::MUL, VT, Custom);
1442 setOperationAction(ISD::UADDSAT, VT, Custom);
1443 setOperationAction(ISD::SADDSAT, VT, Custom);
1444 setOperationAction(ISD::USUBSAT, VT, Custom);
1445 setOperationAction(ISD::SSUBSAT, VT, Custom);
1446 setOperationAction(ISD::VSELECT, VT, Expand);
1447 }
1448
1449 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1450 setOperationAction(ISD::SETCC, VT, Custom);
1451 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1452 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1453 setOperationAction(ISD::SELECT, VT, Custom);
1454 setOperationAction(ISD::TRUNCATE, VT, Custom);
1455
1456 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1457 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1458 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1459 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1460 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1461 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1462 }
1463
1464 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1465 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1466 }
1467
1468 // This block controls legalization for 512-bit operations with 32/64 bit
1469 // elements. 512-bits can be disabled based on prefer-vector-width and
1470 // required-vector-width function attributes.
1471 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1472 bool HasBWI = Subtarget.hasBWI();
1473
1474 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1475 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1476 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1477 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1478 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1479 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1480
1481 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1482 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1483 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1484 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1485 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1486 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1487 if (HasBWI)
1488 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1489 }
1490
1491 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1492 setOperationAction(ISD::FNEG, VT, Custom);
1493 setOperationAction(ISD::FABS, VT, Custom);
1494 setOperationAction(ISD::FMA, VT, Legal);
1495 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1496 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1497 }
1498
1499 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1500 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1501 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1502 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1503 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1504 }
1505 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1506 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1507 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1508 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1509 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1510 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1511 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1512 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1513
1514 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1515 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1516 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1517 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1518 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1519 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1520 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1521 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1522 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1523 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1524 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1525 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1526
1527 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1528 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1529 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1530 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1531 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1532 if (HasBWI)
1533 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1534
1535 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1536 // to 512-bit rather than use the AVX2 instructions so that we can use
1537 // k-masks.
1538 if (!Subtarget.hasVLX()) {
1539 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1540 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1541 setOperationAction(ISD::MLOAD, VT, Custom);
1542 setOperationAction(ISD::MSTORE, VT, Custom);
1543 }
1544 }
1545
1546 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1547 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1548 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1549 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1550 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1551 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1552 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1553 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1554 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1555 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1556 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1557 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1558 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1559
1560 if (HasBWI) {
1561 // Extends from v64i1 masks to 512-bit vectors.
1562 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1563 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1564 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1565 }
1566
1567 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1568 setOperationAction(ISD::FFLOOR, VT, Legal);
1569 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1570 setOperationAction(ISD::FCEIL, VT, Legal);
1571 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1572 setOperationAction(ISD::FTRUNC, VT, Legal);
1573 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1574 setOperationAction(ISD::FRINT, VT, Legal);
1575 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1576 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1577 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1578 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1579 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1580
1581 setOperationAction(ISD::FROUND, VT, Custom);
1582 }
1583
1584 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1585 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1586 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1587 }
1588
1589 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1590 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1591 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1592 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1593
1594 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1595 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1596 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1597 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1598
1599 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1600 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1601 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1602 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1603 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1604 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1605
1606 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1607
1608 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1609 setOperationAction(ISD::SRL, VT, Custom);
1610 setOperationAction(ISD::SHL, VT, Custom);
1611 setOperationAction(ISD::SRA, VT, Custom);
1612 setOperationAction(ISD::SETCC, VT, Custom);
1613
1614 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1615 // setcc all the way to isel and prefer SETGT in some isel patterns.
1616 setCondCodeAction(ISD::SETLT, VT, Custom);
1617 setCondCodeAction(ISD::SETLE, VT, Custom);
1618 }
1619 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1620 setOperationAction(ISD::SMAX, VT, Legal);
1621 setOperationAction(ISD::UMAX, VT, Legal);
1622 setOperationAction(ISD::SMIN, VT, Legal);
1623 setOperationAction(ISD::UMIN, VT, Legal);
1624 setOperationAction(ISD::ABS, VT, Legal);
1625 setOperationAction(ISD::CTPOP, VT, Custom);
1626 setOperationAction(ISD::ROTL, VT, Custom);
1627 setOperationAction(ISD::ROTR, VT, Custom);
1628 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1629 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1630 }
1631
1632 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1633 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1634 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1635 setOperationAction(ISD::CTLZ, VT, Custom);
1636 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1637 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1638 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1639 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1640 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1641 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1642 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1643 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1644 }
1645
1646 if (Subtarget.hasDQI()) {
1647 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1648 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1649 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1650 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1651 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1652 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1653 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1654 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1655
1656 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1657 }
1658
1659 if (Subtarget.hasCDI()) {
1660 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1661 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1662 setOperationAction(ISD::CTLZ, VT, Legal);
1663 }
1664 } // Subtarget.hasCDI()
1665
1666 if (Subtarget.hasVPOPCNTDQ()) {
1667 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1668 setOperationAction(ISD::CTPOP, VT, Legal);
1669 }
1670
1671 // Extract subvector is special because the value type
1672 // (result) is 256-bit but the source is 512-bit wide.
1673 // 128-bit was made Legal under AVX1.
1674 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1675 MVT::v8f32, MVT::v4f64 })
1676 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1677
1678 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1679 MVT::v16f32, MVT::v8f64 }) {
1680 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1681 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1682 setOperationAction(ISD::SELECT, VT, Custom);
1683 setOperationAction(ISD::VSELECT, VT, Custom);
1684 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1685 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1686 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1687 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1688 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1689 }
1690
1691 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1692 setOperationAction(ISD::MLOAD, VT, Legal);
1693 setOperationAction(ISD::MSTORE, VT, Legal);
1694 setOperationAction(ISD::MGATHER, VT, Custom);
1695 setOperationAction(ISD::MSCATTER, VT, Custom);
1696 }
1697 if (HasBWI) {
1698 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1699 setOperationAction(ISD::MLOAD, VT, Legal);
1700 setOperationAction(ISD::MSTORE, VT, Legal);
1701 }
1702 } else {
1703 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1704 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1705 }
1706
1707 if (Subtarget.hasVBMI2()) {
1708 for (auto VT : { MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1709 setOperationAction(ISD::FSHL, VT, Custom);
1710 setOperationAction(ISD::FSHR, VT, Custom);
1711 }
1712 }
1713 }// useAVX512Regs
1714
1715 // This block controls legalization for operations that don't have
1716 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1717 // narrower widths.
1718 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1719 // These operations are handled on non-VLX by artificially widening in
1720 // isel patterns.
1721
1722 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1723 Subtarget.hasVLX() ? Legal : Custom);
1724 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1725 Subtarget.hasVLX() ? Legal : Custom);
1726 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1727 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1728 Subtarget.hasVLX() ? Legal : Custom);
1729 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1730 Subtarget.hasVLX() ? Legal : Custom);
1731 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1732 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1733 Subtarget.hasVLX() ? Legal : Custom);
1734 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1735 Subtarget.hasVLX() ? Legal : Custom);
1736 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1737 Subtarget.hasVLX() ? Legal : Custom);
1738 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1739 Subtarget.hasVLX() ? Legal : Custom);
1740
1741 if (Subtarget.hasDQI()) {
1742 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1743 // v2f32 UINT_TO_FP is already custom under SSE2.
1744 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1746, __PRETTY_FUNCTION__))
1745 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1746, __PRETTY_FUNCTION__))
1746 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom
(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 1746, __PRETTY_FUNCTION__))
;
1747 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1748 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1749 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1750 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1751 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1752 }
1753
1754 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1755 setOperationAction(ISD::SMAX, VT, Legal);
1756 setOperationAction(ISD::UMAX, VT, Legal);
1757 setOperationAction(ISD::SMIN, VT, Legal);
1758 setOperationAction(ISD::UMIN, VT, Legal);
1759 setOperationAction(ISD::ABS, VT, Legal);
1760 }
1761
1762 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1763 setOperationAction(ISD::ROTL, VT, Custom);
1764 setOperationAction(ISD::ROTR, VT, Custom);
1765 }
1766
1767 // Custom legalize 2x32 to get a little better code.
1768 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1769 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1770
1771 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1772 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1773 setOperationAction(ISD::MSCATTER, VT, Custom);
1774
1775 if (Subtarget.hasDQI()) {
1776 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1777 setOperationAction(ISD::SINT_TO_FP, VT,
1778 Subtarget.hasVLX() ? Legal : Custom);
1779 setOperationAction(ISD::UINT_TO_FP, VT,
1780 Subtarget.hasVLX() ? Legal : Custom);
1781 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1782 Subtarget.hasVLX() ? Legal : Custom);
1783 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1784 Subtarget.hasVLX() ? Legal : Custom);
1785 setOperationAction(ISD::FP_TO_SINT, VT,
1786 Subtarget.hasVLX() ? Legal : Custom);
1787 setOperationAction(ISD::FP_TO_UINT, VT,
1788 Subtarget.hasVLX() ? Legal : Custom);
1789 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1790 Subtarget.hasVLX() ? Legal : Custom);
1791 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1792 Subtarget.hasVLX() ? Legal : Custom);
1793 setOperationAction(ISD::MUL, VT, Legal);
1794 }
1795 }
1796
1797 if (Subtarget.hasCDI()) {
1798 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1799 setOperationAction(ISD::CTLZ, VT, Legal);
1800 }
1801 } // Subtarget.hasCDI()
1802
1803 if (Subtarget.hasVPOPCNTDQ()) {
1804 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1805 setOperationAction(ISD::CTPOP, VT, Legal);
1806 }
1807 }
1808
1809 // This block control legalization of v32i1/v64i1 which are available with
1810 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1811 // useBWIRegs.
1812 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1813 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1814 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1815
1816 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1817 setOperationAction(ISD::ADD, VT, Custom);
1818 setOperationAction(ISD::SUB, VT, Custom);
1819 setOperationAction(ISD::MUL, VT, Custom);
1820 setOperationAction(ISD::VSELECT, VT, Expand);
1821 setOperationAction(ISD::UADDSAT, VT, Custom);
1822 setOperationAction(ISD::SADDSAT, VT, Custom);
1823 setOperationAction(ISD::USUBSAT, VT, Custom);
1824 setOperationAction(ISD::SSUBSAT, VT, Custom);
1825
1826 setOperationAction(ISD::TRUNCATE, VT, Custom);
1827 setOperationAction(ISD::SETCC, VT, Custom);
1828 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1829 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1830 setOperationAction(ISD::SELECT, VT, Custom);
1831 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1832 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1833 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1834 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1835 }
1836
1837 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1838 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1839
1840 // Extends from v32i1 masks to 256-bit vectors.
1841 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1842 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1843 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1844
1845 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1846 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1847 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1848 }
1849
1850 // These operations are handled on non-VLX by artificially widening in
1851 // isel patterns.
1852 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1853
1854 if (Subtarget.hasBITALG()) {
1855 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1856 setOperationAction(ISD::CTPOP, VT, Legal);
1857 }
1858 }
1859
1860 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1861 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1862 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1863 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1864 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1865 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1866
1867 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1868 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1869 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1870 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1871 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1872
1873 if (Subtarget.hasBWI()) {
1874 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1875 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1876 }
1877
1878 if (Subtarget.hasVBMI2()) {
1879 // TODO: Make these legal even without VLX?
1880 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1881 MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1882 setOperationAction(ISD::FSHL, VT, Custom);
1883 setOperationAction(ISD::FSHR, VT, Custom);
1884 }
1885 }
1886
1887 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1888 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1889 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1890 }
1891
1892 // We want to custom lower some of our intrinsics.
1893 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1894 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1895 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1896 if (!Subtarget.is64Bit()) {
1897 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1898 }
1899
1900 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1901 // handle type legalization for these operations here.
1902 //
1903 // FIXME: We really should do custom legalization for addition and
1904 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1905 // than generic legalization for 64-bit multiplication-with-overflow, though.
1906 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1907 if (VT == MVT::i64 && !Subtarget.is64Bit())
1908 continue;
1909 // Add/Sub/Mul with overflow operations are custom lowered.
1910 setOperationAction(ISD::SADDO, VT, Custom);
1911 setOperationAction(ISD::UADDO, VT, Custom);
1912 setOperationAction(ISD::SSUBO, VT, Custom);
1913 setOperationAction(ISD::USUBO, VT, Custom);
1914 setOperationAction(ISD::SMULO, VT, Custom);
1915 setOperationAction(ISD::UMULO, VT, Custom);
1916
1917 // Support carry in as value rather than glue.
1918 setOperationAction(ISD::ADDCARRY, VT, Custom);
1919 setOperationAction(ISD::SUBCARRY, VT, Custom);
1920 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1921 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
1922 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
1923 }
1924
1925 if (!Subtarget.is64Bit()) {
1926 // These libcalls are not available in 32-bit.
1927 setLibcallName(RTLIB::SHL_I128, nullptr);
1928 setLibcallName(RTLIB::SRL_I128, nullptr);
1929 setLibcallName(RTLIB::SRA_I128, nullptr);
1930 setLibcallName(RTLIB::MUL_I128, nullptr);
1931 }
1932
1933 // Combine sin / cos into _sincos_stret if it is available.
1934 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1935 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1936 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1937 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1938 }
1939
1940 if (Subtarget.isTargetWin64()) {
1941 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1942 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1943 setOperationAction(ISD::SREM, MVT::i128, Custom);
1944 setOperationAction(ISD::UREM, MVT::i128, Custom);
1945 }
1946
1947 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1948 // is. We should promote the value to 64-bits to solve this.
1949 // This is what the CRT headers do - `fmodf` is an inline header
1950 // function casting to f64 and calling `fmod`.
1951 if (Subtarget.is32Bit() &&
1952 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
1953 for (ISD::NodeType Op :
1954 {ISD::FCEIL, ISD::STRICT_FCEIL,
1955 ISD::FCOS, ISD::STRICT_FCOS,
1956 ISD::FEXP, ISD::STRICT_FEXP,
1957 ISD::FFLOOR, ISD::STRICT_FFLOOR,
1958 ISD::FREM, ISD::STRICT_FREM,
1959 ISD::FLOG, ISD::STRICT_FLOG,
1960 ISD::FLOG10, ISD::STRICT_FLOG10,
1961 ISD::FPOW, ISD::STRICT_FPOW,
1962 ISD::FSIN, ISD::STRICT_FSIN})
1963 if (isOperationExpand(Op, MVT::f32))
1964 setOperationAction(Op, MVT::f32, Promote);
1965
1966 // We have target-specific dag combine patterns for the following nodes:
1967 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1968 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1969 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
1970 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1971 setTargetDAGCombine(ISD::CONCAT_VECTORS);
1972 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1973 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1974 setTargetDAGCombine(ISD::BITCAST);
1975 setTargetDAGCombine(ISD::VSELECT);
1976 setTargetDAGCombine(ISD::SELECT);
1977 setTargetDAGCombine(ISD::SHL);
1978 setTargetDAGCombine(ISD::SRA);
1979 setTargetDAGCombine(ISD::SRL);
1980 setTargetDAGCombine(ISD::OR);
1981 setTargetDAGCombine(ISD::AND);
1982 setTargetDAGCombine(ISD::ADD);
1983 setTargetDAGCombine(ISD::FADD);
1984 setTargetDAGCombine(ISD::FSUB);
1985 setTargetDAGCombine(ISD::FNEG);
1986 setTargetDAGCombine(ISD::FMA);
1987 setTargetDAGCombine(ISD::STRICT_FMA);
1988 setTargetDAGCombine(ISD::FMINNUM);
1989 setTargetDAGCombine(ISD::FMAXNUM);
1990 setTargetDAGCombine(ISD::SUB);
1991 setTargetDAGCombine(ISD::LOAD);
1992 setTargetDAGCombine(ISD::MLOAD);
1993 setTargetDAGCombine(ISD::STORE);
1994 setTargetDAGCombine(ISD::MSTORE);
1995 setTargetDAGCombine(ISD::TRUNCATE);
1996 setTargetDAGCombine(ISD::ZERO_EXTEND);
1997 setTargetDAGCombine(ISD::ANY_EXTEND);
1998 setTargetDAGCombine(ISD::SIGN_EXTEND);
1999 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2000 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2001 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2002 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2003 setTargetDAGCombine(ISD::SINT_TO_FP);
2004 setTargetDAGCombine(ISD::UINT_TO_FP);
2005 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2006 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2007 setTargetDAGCombine(ISD::SETCC);
2008 setTargetDAGCombine(ISD::MUL);
2009 setTargetDAGCombine(ISD::XOR);
2010 setTargetDAGCombine(ISD::MSCATTER);
2011 setTargetDAGCombine(ISD::MGATHER);
2012 setTargetDAGCombine(ISD::FP16_TO_FP);
2013 setTargetDAGCombine(ISD::FP_EXTEND);
2014 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
2015 setTargetDAGCombine(ISD::FP_ROUND);
2016
2017 computeRegisterProperties(Subtarget.getRegisterInfo());
2018
2019 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2020 MaxStoresPerMemsetOptSize = 8;
2021 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2022 MaxStoresPerMemcpyOptSize = 4;
2023 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2024 MaxStoresPerMemmoveOptSize = 4;
2025
2026 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2027 // that needs to benchmarked and balanced with the potential use of vector
2028 // load/store types (PR33329, PR33914).
2029 MaxLoadsPerMemcmp = 2;
2030 MaxLoadsPerMemcmpOptSize = 2;
2031
2032 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
2033 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
2034
2035 // An out-of-order CPU can speculatively execute past a predictable branch,
2036 // but a conditional move could be stalled by an expensive earlier operation.
2037 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2038 EnableExtLdPromotion = true;
2039 setPrefFunctionAlignment(Align(16));
2040
2041 verifyIntrinsicTables();
2042
2043 // Default to having -disable-strictnode-mutation on
2044 IsStrictFPEnabled = true;
2045}
2046
2047// This has so far only been implemented for 64-bit MachO.
2048bool X86TargetLowering::useLoadStackGuardNode() const {
2049 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2050}
2051
2052bool X86TargetLowering::useStackGuardXorFP() const {
2053 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2054 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2055}
2056
2057SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2058 const SDLoc &DL) const {
2059 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2060 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2061 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2062 return SDValue(Node, 0);
2063}
2064
2065TargetLoweringBase::LegalizeTypeAction
2066X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2067 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2068 !Subtarget.hasBWI())
2069 return TypeSplitVector;
2070
2071 if (VT.getVectorNumElements() != 1 &&
2072 VT.getVectorElementType() != MVT::i1)
2073 return TypeWidenVector;
2074
2075 return TargetLoweringBase::getPreferredVectorAction(VT);
2076}
2077
2078static std::pair<MVT, unsigned>
2079handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2080 const X86Subtarget &Subtarget) {
2081 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2082 // convention is one that uses k registers.
2083 if (NumElts == 2)
2084 return {MVT::v2i64, 1};
2085 if (NumElts == 4)
2086 return {MVT::v4i32, 1};
2087 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2088 CC != CallingConv::Intel_OCL_BI)
2089 return {MVT::v8i16, 1};
2090 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2091 CC != CallingConv::Intel_OCL_BI)
2092 return {MVT::v16i8, 1};
2093 // v32i1 passes in ymm unless we have BWI and the calling convention is
2094 // regcall.
2095 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2096 return {MVT::v32i8, 1};
2097 // Split v64i1 vectors if we don't have v64i8 available.
2098 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2099 if (Subtarget.useAVX512Regs())
2100 return {MVT::v64i8, 1};
2101 return {MVT::v32i8, 2};
2102 }
2103
2104 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2105 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2106 NumElts > 64)
2107 return {MVT::i8, NumElts};
2108
2109 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2110}
2111
2112MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2113 CallingConv::ID CC,
2114 EVT VT) const {
2115 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2116 Subtarget.hasAVX512()) {
2117 unsigned NumElts = VT.getVectorNumElements();
2118
2119 MVT RegisterVT;
2120 unsigned NumRegisters;
2121 std::tie(RegisterVT, NumRegisters) =
2122 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2123 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2124 return RegisterVT;
2125 }
2126
2127 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2128}
2129
2130unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2131 CallingConv::ID CC,
2132 EVT VT) const {
2133 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2134 Subtarget.hasAVX512()) {
2135 unsigned NumElts = VT.getVectorNumElements();
2136
2137 MVT RegisterVT;
2138 unsigned NumRegisters;
2139 std::tie(RegisterVT, NumRegisters) =
2140 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2141 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2142 return NumRegisters;
2143 }
2144
2145 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2146}
2147
2148unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2149 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2150 unsigned &NumIntermediates, MVT &RegisterVT) const {
2151 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2152 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2153 Subtarget.hasAVX512() &&
2154 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2155 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2156 VT.getVectorNumElements() > 64)) {
2157 RegisterVT = MVT::i8;
2158 IntermediateVT = MVT::i1;
2159 NumIntermediates = VT.getVectorNumElements();
2160 return NumIntermediates;
2161 }
2162
2163 // Split v64i1 vectors if we don't have v64i8 available.
2164 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2165 CC != CallingConv::X86_RegCall) {
2166 RegisterVT = MVT::v32i8;
2167 IntermediateVT = MVT::v32i1;
2168 NumIntermediates = 2;
2169 return 2;
2170 }
2171
2172 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2173 NumIntermediates, RegisterVT);
2174}
2175
2176EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2177 LLVMContext& Context,
2178 EVT VT) const {
2179 if (!VT.isVector())
2180 return MVT::i8;
2181
2182 if (Subtarget.hasAVX512()) {
2183 const unsigned NumElts = VT.getVectorNumElements();
2184
2185 // Figure out what this type will be legalized to.
2186 EVT LegalVT = VT;
2187 while (getTypeAction(Context, LegalVT) != TypeLegal)
2188 LegalVT = getTypeToTransformTo(Context, LegalVT);
2189
2190 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2191 if (LegalVT.getSimpleVT().is512BitVector())
2192 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2193
2194 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2195 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2196 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2197 // vXi16/vXi8.
2198 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2199 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2200 return EVT::getVectorVT(Context, MVT::i1, NumElts);
2201 }
2202 }
2203
2204 return VT.changeVectorElementTypeToInteger();
2205}
2206
2207/// Helper for getByValTypeAlignment to determine
2208/// the desired ByVal argument alignment.
2209static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2210 if (MaxAlign == 16)
2211 return;
2212 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2213 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2214 MaxAlign = Align(16);
2215 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2216 Align EltAlign;
2217 getMaxByValAlign(ATy->getElementType(), EltAlign);
2218 if (EltAlign > MaxAlign)
2219 MaxAlign = EltAlign;
2220 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2221 for (auto *EltTy : STy->elements()) {
2222 Align EltAlign;
2223 getMaxByValAlign(EltTy, EltAlign);
2224 if (EltAlign > MaxAlign)
2225 MaxAlign = EltAlign;
2226 if (MaxAlign == 16)
2227 break;
2228 }
2229 }
2230}
2231
2232/// Return the desired alignment for ByVal aggregate
2233/// function arguments in the caller parameter area. For X86, aggregates
2234/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2235/// are at 4-byte boundaries.
2236unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2237 const DataLayout &DL) const {
2238 if (Subtarget.is64Bit()) {
2239 // Max of 8 and alignment of type.
2240 Align TyAlign = DL.getABITypeAlign(Ty);
2241 if (TyAlign > 8)
2242 return TyAlign.value();
2243 return 8;
2244 }
2245
2246 Align Alignment(4);
2247 if (Subtarget.hasSSE1())
2248 getMaxByValAlign(Ty, Alignment);
2249 return Alignment.value();
2250}
2251
2252/// It returns EVT::Other if the type should be determined using generic
2253/// target-independent logic.
2254/// For vector ops we check that the overall size isn't larger than our
2255/// preferred vector width.
2256EVT X86TargetLowering::getOptimalMemOpType(
2257 const MemOp &Op, const AttributeList &FuncAttributes) const {
2258 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2259 if (Op.size() >= 16 &&
2260 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2261 // FIXME: Check if unaligned 64-byte accesses are slow.
2262 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2263 (Subtarget.getPreferVectorWidth() >= 512)) {
2264 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2265 }
2266 // FIXME: Check if unaligned 32-byte accesses are slow.
2267 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2268 (Subtarget.getPreferVectorWidth() >= 256)) {
2269 // Although this isn't a well-supported type for AVX1, we'll let
2270 // legalization and shuffle lowering produce the optimal codegen. If we
2271 // choose an optimal type with a vector element larger than a byte,
2272 // getMemsetStores() may create an intermediate splat (using an integer
2273 // multiply) before we splat as a vector.
2274 return MVT::v32i8;
2275 }
2276 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2277 return MVT::v16i8;
2278 // TODO: Can SSE1 handle a byte vector?
2279 // If we have SSE1 registers we should be able to use them.
2280 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2281 (Subtarget.getPreferVectorWidth() >= 128))
2282 return MVT::v4f32;
2283 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2284 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2285 // Do not use f64 to lower memcpy if source is string constant. It's
2286 // better to use i32 to avoid the loads.
2287 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2288 // The gymnastics of splatting a byte value into an XMM register and then
2289 // only using 8-byte stores (because this is a CPU with slow unaligned
2290 // 16-byte accesses) makes that a loser.
2291 return MVT::f64;
2292 }
2293 }
2294 // This is a compromise. If we reach here, unaligned accesses may be slow on
2295 // this target. However, creating smaller, aligned accesses could be even
2296 // slower and would certainly be a lot more code.
2297 if (Subtarget.is64Bit() && Op.size() >= 8)
2298 return MVT::i64;
2299 return MVT::i32;
2300}
2301
2302bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2303 if (VT == MVT::f32)
2304 return X86ScalarSSEf32;
2305 else if (VT == MVT::f64)
2306 return X86ScalarSSEf64;
2307 return true;
2308}
2309
2310bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2311 EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
2312 bool *Fast) const {
2313 if (Fast) {
2314 switch (VT.getSizeInBits()) {
2315 default:
2316 // 8-byte and under are always assumed to be fast.
2317 *Fast = true;
2318 break;
2319 case 128:
2320 *Fast = !Subtarget.isUnalignedMem16Slow();
2321 break;
2322 case 256:
2323 *Fast = !Subtarget.isUnalignedMem32Slow();
2324 break;
2325 // TODO: What about AVX-512 (512-bit) accesses?
2326 }
2327 }
2328 // NonTemporal vector memory ops must be aligned.
2329 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2330 // NT loads can only be vector aligned, so if its less aligned than the
2331 // minimum vector size (which we can split the vector down to), we might as
2332 // well use a regular unaligned vector load.
2333 // We don't have any NT loads pre-SSE41.
2334 if (!!(Flags & MachineMemOperand::MOLoad))
2335 return (Align < 16 || !Subtarget.hasSSE41());
2336 return false;
2337 }
2338 // Misaligned accesses of any size are always allowed.
2339 return true;
2340}
2341
2342/// Return the entry encoding for a jump table in the
2343/// current function. The returned value is a member of the
2344/// MachineJumpTableInfo::JTEntryKind enum.
2345unsigned X86TargetLowering::getJumpTableEncoding() const {
2346 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2347 // symbol.
2348 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2349 return MachineJumpTableInfo::EK_Custom32;
2350
2351 // Otherwise, use the normal jump table encoding heuristics.
2352 return TargetLowering::getJumpTableEncoding();
2353}
2354
2355bool X86TargetLowering::useSoftFloat() const {
2356 return Subtarget.useSoftFloat();
2357}
2358
2359void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2360 ArgListTy &Args) const {
2361
2362 // Only relabel X86-32 for C / Stdcall CCs.
2363 if (Subtarget.is64Bit())
2364 return;
2365 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2366 return;
2367 unsigned ParamRegs = 0;
2368 if (auto *M = MF->getFunction().getParent())
2369 ParamRegs = M->getNumberRegisterParameters();
2370
2371 // Mark the first N int arguments as having reg
2372 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2373 Type *T = Args[Idx].Ty;
2374 if (T->isIntOrPtrTy())
2375 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2376 unsigned numRegs = 1;
2377 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2378 numRegs = 2;
2379 if (ParamRegs < numRegs)
2380 return;
2381 ParamRegs -= numRegs;
2382 Args[Idx].IsInReg = true;
2383 }
2384 }
2385}
2386
2387const MCExpr *
2388X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2389 const MachineBasicBlock *MBB,
2390 unsigned uid,MCContext &Ctx) const{
2391 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2391, __PRETTY_FUNCTION__))
;
2392 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2393 // entries.
2394 return MCSymbolRefExpr::create(MBB->getSymbol(),
2395 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2396}
2397
2398/// Returns relocation base for the given PIC jumptable.
2399SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2400 SelectionDAG &DAG) const {
2401 if (!Subtarget.is64Bit())
2402 // This doesn't have SDLoc associated with it, but is not really the
2403 // same as a Register.
2404 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2405 getPointerTy(DAG.getDataLayout()));
2406 return Table;
2407}
2408
2409/// This returns the relocation base for the given PIC jumptable,
2410/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2411const MCExpr *X86TargetLowering::
2412getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2413 MCContext &Ctx) const {
2414 // X86-64 uses RIP relative addressing based on the jump table label.
2415 if (Subtarget.isPICStyleRIPRel())
2416 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2417
2418 // Otherwise, the reference is relative to the PIC base.
2419 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2420}
2421
2422std::pair<const TargetRegisterClass *, uint8_t>
2423X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2424 MVT VT) const {
2425 const TargetRegisterClass *RRC = nullptr;
2426 uint8_t Cost = 1;
2427 switch (VT.SimpleTy) {
2428 default:
2429 return TargetLowering::findRepresentativeClass(TRI, VT);
2430 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2431 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2432 break;
2433 case MVT::x86mmx:
2434 RRC = &X86::VR64RegClass;
2435 break;
2436 case MVT::f32: case MVT::f64:
2437 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2438 case MVT::v4f32: case MVT::v2f64:
2439 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2440 case MVT::v8f32: case MVT::v4f64:
2441 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2442 case MVT::v16f32: case MVT::v8f64:
2443 RRC = &X86::VR128XRegClass;
2444 break;
2445 }
2446 return std::make_pair(RRC, Cost);
2447}
2448
2449unsigned X86TargetLowering::getAddressSpace() const {
2450 if (Subtarget.is64Bit())
2451 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2452 return 256;
2453}
2454
2455static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2456 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2457 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2458}
2459
2460static Constant* SegmentOffset(IRBuilder<> &IRB,
2461 unsigned Offset, unsigned AddressSpace) {
2462 return ConstantExpr::getIntToPtr(
2463 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2464 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2465}
2466
2467Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2468 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2469 // tcbhead_t; use it instead of the usual global variable (see
2470 // sysdeps/{i386,x86_64}/nptl/tls.h)
2471 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2472 if (Subtarget.isTargetFuchsia()) {
2473 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2474 return SegmentOffset(IRB, 0x10, getAddressSpace());
2475 } else {
2476 unsigned AddressSpace = getAddressSpace();
2477 // Specially, some users may customize the base reg and offset.
2478 unsigned Offset = getTargetMachine().Options.StackProtectorGuardOffset;
2479 // If we don't set -stack-protector-guard-offset value:
2480 // %fs:0x28, unless we're using a Kernel code model, in which case
2481 // it's %gs:0x28. gs:0x14 on i386.
2482 if (Offset == (unsigned)-1)
2483 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2484
2485 auto GuardReg = getTargetMachine().Options.StackProtectorGuardReg;
2486 if (GuardReg == "fs")
2487 AddressSpace = X86AS::FS;
2488 else if (GuardReg == "gs")
2489 AddressSpace = X86AS::GS;
2490 return SegmentOffset(IRB, Offset, AddressSpace);
2491 }
2492 }
2493 return TargetLowering::getIRStackGuard(IRB);
2494}
2495
2496void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2497 // MSVC CRT provides functionalities for stack protection.
2498 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2499 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2500 // MSVC CRT has a global variable holding security cookie.
2501 M.getOrInsertGlobal("__security_cookie",
2502 Type::getInt8PtrTy(M.getContext()));
2503
2504 // MSVC CRT has a function to validate security cookie.
2505 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2506 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2507 Type::getInt8PtrTy(M.getContext()));
2508 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2509 F->setCallingConv(CallingConv::X86_FastCall);
2510 F->addAttribute(1, Attribute::AttrKind::InReg);
2511 }
2512 return;
2513 }
2514
2515 auto GuardMode = getTargetMachine().Options.StackProtectorGuard;
2516
2517 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2518 if ((GuardMode == llvm::StackProtectorGuards::TLS ||
2519 GuardMode == llvm::StackProtectorGuards::None)
2520 && hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2521 return;
2522 TargetLowering::insertSSPDeclarations(M);
2523}
2524
2525Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2526 // MSVC CRT has a global variable holding security cookie.
2527 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2528 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2529 return M.getGlobalVariable("__security_cookie");
2530 }
2531 return TargetLowering::getSDagStackGuard(M);
2532}
2533
2534Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2535 // MSVC CRT has a function to validate security cookie.
2536 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2537 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2538 return M.getFunction("__security_check_cookie");
2539 }
2540 return TargetLowering::getSSPStackGuardCheck(M);
2541}
2542
2543Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2544 if (Subtarget.getTargetTriple().isOSContiki())
2545 return getDefaultSafeStackPointerLocation(IRB, false);
2546
2547 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2548 // definition of TLS_SLOT_SAFESTACK in
2549 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2550 if (Subtarget.isTargetAndroid()) {
2551 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2552 // %gs:0x24 on i386
2553 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2554 return SegmentOffset(IRB, Offset, getAddressSpace());
2555 }
2556
2557 // Fuchsia is similar.
2558 if (Subtarget.isTargetFuchsia()) {
2559 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2560 return SegmentOffset(IRB, 0x18, getAddressSpace());
2561 }
2562
2563 return TargetLowering::getSafeStackPointerLocation(IRB);
2564}
2565
2566//===----------------------------------------------------------------------===//
2567// Return Value Calling Convention Implementation
2568//===----------------------------------------------------------------------===//
2569
2570bool X86TargetLowering::CanLowerReturn(
2571 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2572 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2573 SmallVector<CCValAssign, 16> RVLocs;
2574 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2575 return CCInfo.CheckReturn(Outs, RetCC_X86);
2576}
2577
2578const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2579 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2580 return ScratchRegs;
2581}
2582
2583/// Lowers masks values (v*i1) to the local register values
2584/// \returns DAG node after lowering to register type
2585static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2586 const SDLoc &Dl, SelectionDAG &DAG) {
2587 EVT ValVT = ValArg.getValueType();
2588
2589 if (ValVT == MVT::v1i1)
2590 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2591 DAG.getIntPtrConstant(0, Dl));
2592
2593 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2594 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2595 // Two stage lowering might be required
2596 // bitcast: v8i1 -> i8 / v16i1 -> i16
2597 // anyextend: i8 -> i32 / i16 -> i32
2598 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2599 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2600 if (ValLoc == MVT::i32)
2601 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2602 return ValToCopy;
2603 }
2604
2605 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2606 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2607 // One stage lowering is required
2608 // bitcast: v32i1 -> i32 / v64i1 -> i64
2609 return DAG.getBitcast(ValLoc, ValArg);
2610 }
2611
2612 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2613}
2614
2615/// Breaks v64i1 value into two registers and adds the new node to the DAG
2616static void Passv64i1ArgInRegs(
2617 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2618 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2619 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2620 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2620, __PRETTY_FUNCTION__))
;
2621 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2621, __PRETTY_FUNCTION__))
;
2622 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2622, __PRETTY_FUNCTION__))
;
2623 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2624, __PRETTY_FUNCTION__))
2624 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2624, __PRETTY_FUNCTION__))
;
2625
2626 // Before splitting the value we cast it to i64
2627 Arg = DAG.getBitcast(MVT::i64, Arg);
2628
2629 // Splitting the value into two i32 types
2630 SDValue Lo, Hi;
2631 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2632 DAG.getConstant(0, Dl, MVT::i32));
2633 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2634 DAG.getConstant(1, Dl, MVT::i32));
2635
2636 // Attach the two i32 types into corresponding registers
2637 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2638 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2639}
2640
2641SDValue
2642X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2643 bool isVarArg,
2644 const SmallVectorImpl<ISD::OutputArg> &Outs,
2645 const SmallVectorImpl<SDValue> &OutVals,
2646 const SDLoc &dl, SelectionDAG &DAG) const {
2647 MachineFunction &MF = DAG.getMachineFunction();
2648 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2649
2650 // In some cases we need to disable registers from the default CSR list.
2651 // For example, when they are used for argument passing.
2652 bool ShouldDisableCalleeSavedRegister =
2653 CallConv == CallingConv::X86_RegCall ||
2654 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2655
2656 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2657 report_fatal_error("X86 interrupts may not return any value");
2658
2659 SmallVector<CCValAssign, 16> RVLocs;
2660 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2661 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2662
2663 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2664 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2665 ++I, ++OutsIndex) {
2666 CCValAssign &VA = RVLocs[I];
2667 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2667, __PRETTY_FUNCTION__))
;
2668
2669 // Add the register to the CalleeSaveDisableRegs list.
2670 if (ShouldDisableCalleeSavedRegister)
2671 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2672
2673 SDValue ValToCopy = OutVals[OutsIndex];
2674 EVT ValVT = ValToCopy.getValueType();
2675
2676 // Promote values to the appropriate types.
2677 if (VA.getLocInfo() == CCValAssign::SExt)
2678 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2679 else if (VA.getLocInfo() == CCValAssign::ZExt)
2680 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2681 else if (VA.getLocInfo() == CCValAssign::AExt) {
2682 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2683 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2684 else
2685 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2686 }
2687 else if (VA.getLocInfo() == CCValAssign::BCvt)
2688 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2689
2690 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2691, __PRETTY_FUNCTION__))
2691 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2691, __PRETTY_FUNCTION__))
;
2692
2693 // Report an error if we have attempted to return a value via an XMM
2694 // register and SSE was disabled.
2695 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2696 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2697 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2698 } else if (!Subtarget.hasSSE2() &&
2699 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2700 ValVT == MVT::f64) {
2701 // When returning a double via an XMM register, report an error if SSE2 is
2702 // not enabled.
2703 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2704 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2705 }
2706
2707 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2708 // the RET instruction and handled by the FP Stackifier.
2709 if (VA.getLocReg() == X86::FP0 ||
2710 VA.getLocReg() == X86::FP1) {
2711 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2712 // change the value to the FP stack register class.
2713 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2714 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2715 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2716 // Don't emit a copytoreg.
2717 continue;
2718 }
2719
2720 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2721 // which is returned in RAX / RDX.
2722 if (Subtarget.is64Bit()) {
2723 if (ValVT == MVT::x86mmx) {
2724 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2725 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2726 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2727 ValToCopy);
2728 // If we don't have SSE2 available, convert to v4f32 so the generated
2729 // register is legal.
2730 if (!Subtarget.hasSSE2())
2731 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2732 }
2733 }
2734 }
2735
2736 if (VA.needsCustom()) {
2737 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2738, __PRETTY_FUNCTION__))
2738 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2738, __PRETTY_FUNCTION__))
;
2739
2740 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2741 Subtarget);
2742
2743 // Add the second register to the CalleeSaveDisableRegs list.
2744 if (ShouldDisableCalleeSavedRegister)
2745 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2746 } else {
2747 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2748 }
2749 }
2750
2751 SDValue Flag;
2752 SmallVector<SDValue, 6> RetOps;
2753 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2754 // Operand #1 = Bytes To Pop
2755 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2756 MVT::i32));
2757
2758 // Copy the result values into the output registers.
2759 for (auto &RetVal : RetVals) {
2760 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2761 RetOps.push_back(RetVal.second);
2762 continue; // Don't emit a copytoreg.
2763 }
2764
2765 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2766 Flag = Chain.getValue(1);
2767 RetOps.push_back(
2768 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
2769 }
2770
2771 // Swift calling convention does not require we copy the sret argument
2772 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2773
2774 // All x86 ABIs require that for returning structs by value we copy
2775 // the sret argument into %rax/%eax (depending on ABI) for the return.
2776 // We saved the argument into a virtual register in the entry block,
2777 // so now we copy the value out and into %rax/%eax.
2778 //
2779 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2780 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2781 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2782 // either case FuncInfo->setSRetReturnReg() will have been called.
2783 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
2784 // When we have both sret and another return value, we should use the
2785 // original Chain stored in RetOps[0], instead of the current Chain updated
2786 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2787
2788 // For the case of sret and another return value, we have
2789 // Chain_0 at the function entry
2790 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2791 // If we use Chain_1 in getCopyFromReg, we will have
2792 // Val = getCopyFromReg(Chain_1)
2793 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2794
2795 // getCopyToReg(Chain_0) will be glued together with
2796 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2797 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2798 // Data dependency from Unit B to Unit A due to usage of Val in
2799 // getCopyToReg(Chain_1, Val)
2800 // Chain dependency from Unit A to Unit B
2801
2802 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2803 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2804 getPointerTy(MF.getDataLayout()));
2805
2806 Register RetValReg
2807 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2808 X86::RAX : X86::EAX;
2809 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2810 Flag = Chain.getValue(1);
2811
2812 // RAX/EAX now acts like a return value.
2813 RetOps.push_back(
2814 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2815
2816 // Add the returned register to the CalleeSaveDisableRegs list.
2817 if (ShouldDisableCalleeSavedRegister)
2818 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2819 }
2820
2821 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2822 const MCPhysReg *I =
2823 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2824 if (I) {
2825 for (; *I; ++I) {
2826 if (X86::GR64RegClass.contains(*I))
2827 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2828 else
2829 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2829)
;
2830 }
2831 }
2832
2833 RetOps[0] = Chain; // Update chain.
2834
2835 // Add the flag if we have it.
2836 if (Flag.getNode())
2837 RetOps.push_back(Flag);
2838
2839 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2840 if (CallConv == CallingConv::X86_INTR)
2841 opcode = X86ISD::IRET;
2842 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2843}
2844
2845bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2846 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2847 return false;
2848
2849 SDValue TCChain = Chain;
2850 SDNode *Copy = *N->use_begin();
2851 if (Copy->getOpcode() == ISD::CopyToReg) {
2852 // If the copy has a glue operand, we conservatively assume it isn't safe to
2853 // perform a tail call.
2854 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2855 return false;
2856 TCChain = Copy->getOperand(0);
2857 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2858 return false;
2859
2860 bool HasRet = false;
2861 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2862 UI != UE; ++UI) {
2863 if (UI->getOpcode() != X86ISD::RET_FLAG)
2864 return false;
2865 // If we are returning more than one value, we can definitely
2866 // not make a tail call see PR19530
2867 if (UI->getNumOperands() > 4)
2868 return false;
2869 if (UI->getNumOperands() == 4 &&
2870 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2871 return false;
2872 HasRet = true;
2873 }
2874
2875 if (!HasRet)
2876 return false;
2877
2878 Chain = TCChain;
2879 return true;
2880}
2881
2882EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2883 ISD::NodeType ExtendKind) const {
2884 MVT ReturnMVT = MVT::i32;
2885
2886 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2887 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2888 // The ABI does not require i1, i8 or i16 to be extended.
2889 //
2890 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2891 // always extending i8/i16 return values, so keep doing that for now.
2892 // (PR26665).
2893 ReturnMVT = MVT::i8;
2894 }
2895
2896 EVT MinVT = getRegisterType(Context, ReturnMVT);
2897 return VT.bitsLT(MinVT) ? MinVT : VT;
2898}
2899
2900/// Reads two 32 bit registers and creates a 64 bit mask value.
2901/// \param VA The current 32 bit value that need to be assigned.
2902/// \param NextVA The next 32 bit value that need to be assigned.
2903/// \param Root The parent DAG node.
2904/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2905/// glue purposes. In the case the DAG is already using
2906/// physical register instead of virtual, we should glue
2907/// our new SDValue to InFlag SDvalue.
2908/// \return a new SDvalue of size 64bit.
2909static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2910 SDValue &Root, SelectionDAG &DAG,
2911 const SDLoc &Dl, const X86Subtarget &Subtarget,
2912 SDValue *InFlag = nullptr) {
2913 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2913, __PRETTY_FUNCTION__))
;
2914 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2914, __PRETTY_FUNCTION__))
;
2915 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2916, __PRETTY_FUNCTION__))
2916 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2916, __PRETTY_FUNCTION__))
;
2917 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2918, __PRETTY_FUNCTION__))
2918 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2918, __PRETTY_FUNCTION__))
;
2919 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2920, __PRETTY_FUNCTION__))
2920 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2920, __PRETTY_FUNCTION__))
;
2921
2922 SDValue Lo, Hi;
2923 SDValue ArgValueLo, ArgValueHi;
2924
2925 MachineFunction &MF = DAG.getMachineFunction();
2926 const TargetRegisterClass *RC = &X86::GR32RegClass;
2927
2928 // Read a 32 bit value from the registers.
2929 if (nullptr == InFlag) {
2930 // When no physical register is present,
2931 // create an intermediate virtual register.
2932 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
2933 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2934 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2935 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2936 } else {
2937 // When a physical register is available read the value from it and glue
2938 // the reads together.
2939 ArgValueLo =
2940 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2941 *InFlag = ArgValueLo.getValue(2);
2942 ArgValueHi =
2943 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2944 *InFlag = ArgValueHi.getValue(2);
2945 }
2946
2947 // Convert the i32 type into v32i1 type.
2948 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2949
2950 // Convert the i32 type into v32i1 type.
2951 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2952
2953 // Concatenate the two values together.
2954 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2955}
2956
2957/// The function will lower a register of various sizes (8/16/32/64)
2958/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2959/// \returns a DAG node contains the operand after lowering to mask type.
2960static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2961 const EVT &ValLoc, const SDLoc &Dl,
2962 SelectionDAG &DAG) {
2963 SDValue ValReturned = ValArg;
2964
2965 if (ValVT == MVT::v1i1)
2966 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2967
2968 if (ValVT == MVT::v64i1) {
2969 // In 32 bit machine, this case is handled by getv64i1Argument
2970 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2970, __PRETTY_FUNCTION__))
;
2971 // In 64 bit machine, There is no need to truncate the value only bitcast
2972 } else {
2973 MVT maskLen;
2974 switch (ValVT.getSimpleVT().SimpleTy) {
2975 case MVT::v8i1:
2976 maskLen = MVT::i8;
2977 break;
2978 case MVT::v16i1:
2979 maskLen = MVT::i16;
2980 break;
2981 case MVT::v32i1:
2982 maskLen = MVT::i32;
2983 break;
2984 default:
2985 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 2985)
;
2986 }
2987
2988 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2989 }
2990 return DAG.getBitcast(ValVT, ValReturned);
2991}
2992
2993/// Lower the result values of a call into the
2994/// appropriate copies out of appropriate physical registers.
2995///
2996SDValue X86TargetLowering::LowerCallResult(
2997 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2998 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2999 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3000 uint32_t *RegMask) const {
3001
3002 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3003 // Assign locations to each value returned by this call.
3004 SmallVector<CCValAssign, 16> RVLocs;
3005 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3006 *DAG.getContext());
3007 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3008
3009 // Copy all of the result registers out of their specified physreg.
3010 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3011 ++I, ++InsIndex) {
3012 CCValAssign &VA = RVLocs[I];
3013 EVT CopyVT = VA.getLocVT();
3014
3015 // In some calling conventions we need to remove the used registers
3016 // from the register mask.
3017 if (RegMask) {
3018 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3019 SubRegs.isValid(); ++SubRegs)
3020 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3021 }
3022
3023 // Report an error if there was an attempt to return FP values via XMM
3024 // registers.
3025 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3026 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3027 if (VA.getLocReg() == X86::XMM1)
3028 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3029 else
3030 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3031 } else if (!Subtarget.hasSSE2() &&
3032 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3033 CopyVT == MVT::f64) {
3034 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3035 if (VA.getLocReg() == X86::XMM1)
3036 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3037 else
3038 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3039 }
3040
3041 // If we prefer to use the value in xmm registers, copy it out as f80 and
3042 // use a truncate to move it from fp stack reg to xmm reg.
3043 bool RoundAfterCopy = false;
3044 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3045 isScalarFPTypeInSSEReg(VA.getValVT())) {
3046 if (!Subtarget.hasX87())
3047 report_fatal_error("X87 register return with X87 disabled");
3048 CopyVT = MVT::f80;
3049 RoundAfterCopy = (CopyVT != VA.getLocVT());
3050 }
3051
3052 SDValue Val;
3053 if (VA.needsCustom()) {
3054 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3055, __PRETTY_FUNCTION__))
3055 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3055, __PRETTY_FUNCTION__))
;
3056 Val =
3057 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3058 } else {
3059 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3060 .getValue(1);
3061 Val = Chain.getValue(0);
3062 InFlag = Chain.getValue(2);
3063 }
3064
3065 if (RoundAfterCopy)
3066 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3067 // This truncation won't change the value.
3068 DAG.getIntPtrConstant(1, dl));
3069
3070 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
3071 if (VA.getValVT().isVector() &&
3072 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3073 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3074 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3075 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3076 } else
3077 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3078 }
3079
3080 if (VA.getLocInfo() == CCValAssign::BCvt)
3081 Val = DAG.getBitcast(VA.getValVT(), Val);
3082
3083 InVals.push_back(Val);
3084 }
3085
3086 return Chain;
3087}
3088
3089//===----------------------------------------------------------------------===//
3090// C & StdCall & Fast Calling Convention implementation
3091//===----------------------------------------------------------------------===//
3092// StdCall calling convention seems to be standard for many Windows' API
3093// routines and around. It differs from C calling convention just a little:
3094// callee should clean up the stack, not caller. Symbols should be also
3095// decorated in some fancy way :) It doesn't support any vector arguments.
3096// For info on fast calling convention see Fast Calling Convention (tail call)
3097// implementation LowerX86_32FastCCCallTo.
3098
3099/// CallIsStructReturn - Determines whether a call uses struct return
3100/// semantics.
3101enum StructReturnType {
3102 NotStructReturn,
3103 RegStructReturn,
3104 StackStructReturn
3105};
3106static StructReturnType
3107callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3108 if (Outs.empty())
3109 return NotStructReturn;
3110
3111 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3112 if (!Flags.isSRet())
3113 return NotStructReturn;
3114 if (Flags.isInReg() || IsMCU)
3115 return RegStructReturn;
3116 return StackStructReturn;
3117}
3118
3119/// Determines whether a function uses struct return semantics.
3120static StructReturnType
3121argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3122 if (Ins.empty())
3123 return NotStructReturn;
3124
3125 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3126 if (!Flags.isSRet())
3127 return NotStructReturn;
3128 if (Flags.isInReg() || IsMCU)
3129 return RegStructReturn;
3130 return StackStructReturn;
3131}
3132
3133/// Make a copy of an aggregate at address specified by "Src" to address
3134/// "Dst" with size and alignment information specified by the specific
3135/// parameter attribute. The copy will be passed as a byval function parameter.
3136static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3137 SDValue Chain, ISD::ArgFlagsTy Flags,
3138 SelectionDAG &DAG, const SDLoc &dl) {
3139 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3140
3141 return DAG.getMemcpy(
3142 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3143 /*isVolatile*/ false, /*AlwaysInline=*/true,
3144 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3145}
3146
3147/// Return true if the calling convention is one that we can guarantee TCO for.
3148static bool canGuaranteeTCO(CallingConv::ID CC) {
3149 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3150 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3151 CC == CallingConv::HHVM || CC == CallingConv::Tail);
3152}
3153
3154/// Return true if we might ever do TCO for calls with this calling convention.
3155static bool mayTailCallThisCC(CallingConv::ID CC) {
3156 switch (CC) {
3157 // C calling conventions:
3158 case CallingConv::C:
3159 case CallingConv::Win64:
3160 case CallingConv::X86_64_SysV:
3161 // Callee pop conventions:
3162 case CallingConv::X86_ThisCall:
3163 case CallingConv::X86_StdCall:
3164 case CallingConv::X86_VectorCall:
3165 case CallingConv::X86_FastCall:
3166 // Swift:
3167 case CallingConv::Swift:
3168 return true;
3169 default:
3170 return canGuaranteeTCO(CC);
3171 }
3172}
3173
3174/// Return true if the function is being made into a tailcall target by
3175/// changing its ABI.
3176static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3177 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
3178}
3179
3180bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3181 if (!CI->isTailCall())
3182 return false;
3183
3184 CallingConv::ID CalleeCC = CI->getCallingConv();
3185 if (!mayTailCallThisCC(CalleeCC))
3186 return false;
3187
3188 return true;
3189}
3190
3191SDValue
3192X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3193 const SmallVectorImpl<ISD::InputArg> &Ins,
3194 const SDLoc &dl, SelectionDAG &DAG,
3195 const CCValAssign &VA,
3196 MachineFrameInfo &MFI, unsigned i) const {
3197 // Create the nodes corresponding to a load from this parameter slot.
3198 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3199 bool AlwaysUseMutable = shouldGuaranteeTCO(
3200 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3201 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3202 EVT ValVT;
3203 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3204
3205 // If value is passed by pointer we have address passed instead of the value
3206 // itself. No need to extend if the mask value and location share the same
3207 // absolute size.
3208 bool ExtendedInMem =
3209 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3210 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3211
3212 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3213 ValVT = VA.getLocVT();
3214 else
3215 ValVT = VA.getValVT();
3216
3217 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3218 // changed with more analysis.
3219 // In case of tail call optimization mark all arguments mutable. Since they
3220 // could be overwritten by lowering of arguments in case of a tail call.
3221 if (Flags.isByVal()) {
3222 unsigned Bytes = Flags.getByValSize();
3223 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3224
3225 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3226 // can be improved with deeper analysis.
3227 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3228 /*isAliased=*/true);
3229 return DAG.getFrameIndex(FI, PtrVT);
3230 }
3231
3232 EVT ArgVT = Ins[i].ArgVT;
3233
3234 // If this is a vector that has been split into multiple parts, and the
3235 // scalar size of the parts don't match the vector element size, then we can't
3236 // elide the copy. The parts will have padding between them instead of being
3237 // packed like a vector.
3238 bool ScalarizedAndExtendedVector =
3239 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3240 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3241
3242 // This is an argument in memory. We might be able to perform copy elision.
3243 // If the argument is passed directly in memory without any extension, then we
3244 // can perform copy elision. Large vector types, for example, may be passed
3245 // indirectly by pointer.
3246 if (Flags.isCopyElisionCandidate() &&
3247 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3248 !ScalarizedAndExtendedVector) {
3249 SDValue PartAddr;
3250 if (Ins[i].PartOffset == 0) {
3251 // If this is a one-part value or the first part of a multi-part value,
3252 // create a stack object for the entire argument value type and return a
3253 // load from our portion of it. This assumes that if the first part of an
3254 // argument is in memory, the rest will also be in memory.
3255 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3256 /*IsImmutable=*/false);
3257 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3258 return DAG.getLoad(
3259 ValVT, dl, Chain, PartAddr,
3260 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3261 } else {
3262 // This is not the first piece of an argument in memory. See if there is
3263 // already a fixed stack object including this offset. If so, assume it
3264 // was created by the PartOffset == 0 branch above and create a load from
3265 // the appropriate offset into it.
3266 int64_t PartBegin = VA.getLocMemOffset();
3267 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3268 int FI = MFI.getObjectIndexBegin();
3269 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3270 int64_t ObjBegin = MFI.getObjectOffset(FI);
3271 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3272 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3273 break;
3274 }
3275 if (MFI.isFixedObjectIndex(FI)) {
3276 SDValue Addr =
3277 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3278 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3279 return DAG.getLoad(
3280 ValVT, dl, Chain, Addr,
3281 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3282 Ins[i].PartOffset));
3283 }
3284 }
3285 }
3286
3287 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3288 VA.getLocMemOffset(), isImmutable);
3289
3290 // Set SExt or ZExt flag.
3291 if (VA.getLocInfo() == CCValAssign::ZExt) {
3292 MFI.setObjectZExt(FI, true);
3293 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3294 MFI.setObjectSExt(FI, true);
3295 }
3296
3297 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3298 SDValue Val = DAG.getLoad(
3299 ValVT, dl, Chain, FIN,
3300 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3301 return ExtendedInMem
3302 ? (VA.getValVT().isVector()
3303 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3304 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3305 : Val;
3306}
3307
3308// FIXME: Get this from tablegen.
3309static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3310 const X86Subtarget &Subtarget) {
3311 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3311, __PRETTY_FUNCTION__))
;
3312
3313 if (Subtarget.isCallingConvWin64(CallConv)) {
3314 static const MCPhysReg GPR64ArgRegsWin64[] = {
3315 X86::RCX, X86::RDX, X86::R8, X86::R9
3316 };
3317 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3318 }
3319
3320 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3321 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3322 };
3323 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3324}
3325
3326// FIXME: Get this from tablegen.
3327static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3328 CallingConv::ID CallConv,
3329 const X86Subtarget &Subtarget) {
3330 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3330, __PRETTY_FUNCTION__))
;
3331 if (Subtarget.isCallingConvWin64(CallConv)) {
3332 // The XMM registers which might contain var arg parameters are shadowed
3333 // in their paired GPR. So we only need to save the GPR to their home
3334 // slots.
3335 // TODO: __vectorcall will change this.
3336 return None;
3337 }
3338
3339 const Function &F = MF.getFunction();
3340 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3341 bool isSoftFloat = Subtarget.useSoftFloat();
3342 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3343, __PRETTY_FUNCTION__))
3343 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3343, __PRETTY_FUNCTION__))
;
3344 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3345 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3346 // registers.
3347 return None;
3348
3349 static const MCPhysReg XMMArgRegs64Bit[] = {
3350 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3351 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3352 };
3353 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3354}
3355
3356#ifndef NDEBUG
3357static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3358 return llvm::is_sorted(
3359 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3360 return A.getValNo() < B.getValNo();
3361 });
3362}
3363#endif
3364
3365namespace {
3366/// This is a helper class for lowering variable arguments parameters.
3367class VarArgsLoweringHelper {
3368public:
3369 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3370 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3371 CallingConv::ID CallConv, CCState &CCInfo)
3372 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3373 TheMachineFunction(DAG.getMachineFunction()),
3374 TheFunction(TheMachineFunction.getFunction()),
3375 FrameInfo(TheMachineFunction.getFrameInfo()),
3376 FrameLowering(*Subtarget.getFrameLowering()),
3377 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3378 CCInfo(CCInfo) {}
3379
3380 // Lower variable arguments parameters.
3381 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3382
3383private:
3384 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3385
3386 void forwardMustTailParameters(SDValue &Chain);
3387
3388 bool is64Bit() const { return Subtarget.is64Bit(); }
3389 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3390
3391 X86MachineFunctionInfo *FuncInfo;
3392 const SDLoc &DL;
3393 SelectionDAG &DAG;
3394 const X86Subtarget &Subtarget;
3395 MachineFunction &TheMachineFunction;
3396 const Function &TheFunction;
3397 MachineFrameInfo &FrameInfo;
3398 const TargetFrameLowering &FrameLowering;
3399 const TargetLowering &TargLowering;
3400 CallingConv::ID CallConv;
3401 CCState &CCInfo;
3402};
3403} // namespace
3404
3405void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3406 SDValue &Chain, unsigned StackSize) {
3407 // If the function takes variable number of arguments, make a frame index for
3408 // the start of the first vararg value... for expansion of llvm.va_start. We
3409 // can skip this if there are no va_start calls.
3410 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3411 CallConv != CallingConv::X86_ThisCall)) {
3412 FuncInfo->setVarArgsFrameIndex(
3413 FrameInfo.CreateFixedObject(1, StackSize, true));
3414 }
3415
3416 // Figure out if XMM registers are in use.
3417 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3419, __PRETTY_FUNCTION__))
3418 TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3419, __PRETTY_FUNCTION__))
3419 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3419, __PRETTY_FUNCTION__))
;
3420
3421 // 64-bit calling conventions support varargs and register parameters, so we
3422 // have to do extra work to spill them in the prologue.
3423 if (is64Bit()) {
3424 // Find the first unallocated argument registers.
3425 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3426 ArrayRef<MCPhysReg> ArgXMMs =
3427 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3428 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3429 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3430
3431 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3432, __PRETTY_FUNCTION__))
3432 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3432, __PRETTY_FUNCTION__))
;
3433
3434 if (isWin64()) {
3435 // Get to the caller-allocated home save location. Add 8 to account
3436 // for the return address.
3437 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3438 FuncInfo->setRegSaveFrameIndex(
3439 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3440 // Fixup to set vararg frame on shadow area (4 x i64).
3441 if (NumIntRegs < 4)
3442 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3443 } else {
3444 // For X86-64, if there are vararg parameters that are passed via
3445 // registers, then we must store them to their spots on the stack so
3446 // they may be loaded by dereferencing the result of va_next.
3447 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3448 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3449 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3450 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3451 }
3452
3453 SmallVector<SDValue, 6>
3454 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3455 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3456 // keeping live input value
3457 SDValue ALVal; // if applicable keeps SDValue for %al register
3458
3459 // Gather all the live in physical registers.
3460 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3461 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3462 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3463 }
3464 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3465 if (!AvailableXmms.empty()) {
3466 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3467 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3468 for (MCPhysReg Reg : AvailableXmms) {
3469 Register XMMReg = TheMachineFunction.addLiveIn(Reg, &X86::VR128RegClass);
3470 LiveXMMRegs.push_back(
3471 DAG.getCopyFromReg(Chain, DL, XMMReg, MVT::v4f32));
3472 }
3473 }
3474
3475 // Store the integer parameter registers.
3476 SmallVector<SDValue, 8> MemOps;
3477 SDValue RSFIN =
3478 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3479 TargLowering.getPointerTy(DAG.getDataLayout()));
3480 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3481 for (SDValue Val : LiveGPRs) {
3482 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3483 TargLowering.getPointerTy(DAG.getDataLayout()),
3484 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3485 SDValue Store =
3486 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3487 MachinePointerInfo::getFixedStack(
3488 DAG.getMachineFunction(),
3489 FuncInfo->getRegSaveFrameIndex(), Offset));
3490 MemOps.push_back(Store);
3491 Offset += 8;
3492 }
3493
3494 // Now store the XMM (fp + vector) parameter registers.
3495 if (!LiveXMMRegs.empty()) {
3496 SmallVector<SDValue, 12> SaveXMMOps;
3497 SaveXMMOps.push_back(Chain);
3498 SaveXMMOps.push_back(ALVal);
3499 SaveXMMOps.push_back(
3500 DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
3501 SaveXMMOps.push_back(
3502 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3503 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3504 LiveXMMRegs.end());
3505 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
3506 MVT::Other, SaveXMMOps));
3507 }
3508
3509 if (!MemOps.empty())
3510 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3511 }
3512}
3513
3514void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3515 // Find the largest legal vector type.
3516 MVT VecVT = MVT::Other;
3517 // FIXME: Only some x86_32 calling conventions support AVX512.
3518 if (Subtarget.useAVX512Regs() &&
3519 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3520 CallConv == CallingConv::Intel_OCL_BI)))
3521 VecVT = MVT::v16f32;
3522 else if (Subtarget.hasAVX())
3523 VecVT = MVT::v8f32;
3524 else if (Subtarget.hasSSE2())
3525 VecVT = MVT::v4f32;
3526
3527 // We forward some GPRs and some vector types.
3528 SmallVector<MVT, 2> RegParmTypes;
3529 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3530 RegParmTypes.push_back(IntVT);
3531 if (VecVT != MVT::Other)
3532 RegParmTypes.push_back(VecVT);
3533
3534 // Compute the set of forwarded registers. The rest are scratch.
3535 SmallVectorImpl<ForwardedRegister> &Forwards =
3536 FuncInfo->getForwardedMustTailRegParms();
3537 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3538
3539 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3540 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3541 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3542 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3543 }
3544
3545 // Copy all forwards from physical to virtual registers.
3546 for (ForwardedRegister &FR : Forwards) {
3547 // FIXME: Can we use a less constrained schedule?
3548 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3549 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3550 TargLowering.getRegClassFor(FR.VT));
3551 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3552 }
3553}
3554
3555void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3556 unsigned StackSize) {
3557 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3558 // If necessary, it would be set into the correct value later.
3559 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3560 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3561
3562 if (FrameInfo.hasVAStart())
3563 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3564
3565 if (FrameInfo.hasMustTailInVarArgFunc())
3566 forwardMustTailParameters(Chain);
3567}
3568
3569SDValue X86TargetLowering::LowerFormalArguments(
3570 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3571 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3572 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3573 MachineFunction &MF = DAG.getMachineFunction();
3574 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3575
3576 const Function &F = MF.getFunction();
3577 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3578 F.getName() == "main")
3579 FuncInfo->setForceFramePointer(true);
3580
3581 MachineFrameInfo &MFI = MF.getFrameInfo();
3582 bool Is64Bit = Subtarget.is64Bit();
3583 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3584
3585 assert(((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3587, __PRETTY_FUNCTION__))
3586 !(IsVarArg && canGuaranteeTCO(CallConv)) &&((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3587, __PRETTY_FUNCTION__))
3587 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(IsVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3587, __PRETTY_FUNCTION__))
;
3588
3589 // Assign locations to all of the incoming arguments.
3590 SmallVector<CCValAssign, 16> ArgLocs;
3591 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3592
3593 // Allocate shadow area for Win64.
3594 if (IsWin64)
3595 CCInfo.AllocateStack(32, Align(8));
3596
3597 CCInfo.AnalyzeArguments(Ins, CC_X86);
3598
3599 // In vectorcall calling convention a second pass is required for the HVA
3600 // types.
3601 if (CallingConv::X86_VectorCall == CallConv) {
3602 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3603 }
3604
3605 // The next loop assumes that the locations are in the same order of the
3606 // input arguments.
3607 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3608, __PRETTY_FUNCTION__))
3608 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3608, __PRETTY_FUNCTION__))
;
3609
3610 SDValue ArgValue;
3611 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3612 ++I, ++InsIndex) {
3613 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3613, __PRETTY_FUNCTION__))
;
3614 CCValAssign &VA = ArgLocs[I];
3615
3616 if (VA.isRegLoc()) {
3617 EVT RegVT = VA.getLocVT();
3618 if (VA.needsCustom()) {
3619 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __PRETTY_FUNCTION__))
3620 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __PRETTY_FUNCTION__))
3621 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3621, __PRETTY_FUNCTION__))
;
3622
3623 // v64i1 values, in regcall calling convention, that are
3624 // compiled to 32 bit arch, are split up into two registers.
3625 ArgValue =
3626 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3627 } else {
3628 const TargetRegisterClass *RC;
3629 if (RegVT == MVT::i8)
3630 RC = &X86::GR8RegClass;
3631 else if (RegVT == MVT::i16)
3632 RC = &X86::GR16RegClass;
3633 else if (RegVT == MVT::i32)
3634 RC = &X86::GR32RegClass;
3635 else if (Is64Bit && RegVT == MVT::i64)
3636 RC = &X86::GR64RegClass;
3637 else if (RegVT == MVT::f32)
3638 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3639 else if (RegVT == MVT::f64)
3640 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3641 else if (RegVT == MVT::f80)
3642 RC = &X86::RFP80RegClass;
3643 else if (RegVT == MVT::f128)
3644 RC = &X86::VR128RegClass;
3645 else if (RegVT.is512BitVector())
3646 RC = &X86::VR512RegClass;
3647 else if (RegVT.is256BitVector())
3648 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3649 else if (RegVT.is128BitVector())
3650 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3651 else if (RegVT == MVT::x86mmx)
3652 RC = &X86::VR64RegClass;
3653 else if (RegVT == MVT::v1i1)
3654 RC = &X86::VK1RegClass;
3655 else if (RegVT == MVT::v8i1)
3656 RC = &X86::VK8RegClass;
3657 else if (RegVT == MVT::v16i1)
3658 RC = &X86::VK16RegClass;
3659 else if (RegVT == MVT::v32i1)
3660 RC = &X86::VK32RegClass;
3661 else if (RegVT == MVT::v64i1)
3662 RC = &X86::VK64RegClass;
3663 else
3664 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3664)
;
3665
3666 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3667 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3668 }
3669
3670 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3671 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3672 // right size.
3673 if (VA.getLocInfo() == CCValAssign::SExt)
3674 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3675 DAG.getValueType(VA.getValVT()));
3676 else if (VA.getLocInfo() == CCValAssign::ZExt)
3677 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3678 DAG.getValueType(VA.getValVT()));
3679 else if (VA.getLocInfo() == CCValAssign::BCvt)
3680 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3681
3682 if (VA.isExtInLoc()) {
3683 // Handle MMX values passed in XMM regs.
3684 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3685 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3686 else if (VA.getValVT().isVector() &&
3687 VA.getValVT().getScalarType() == MVT::i1 &&
3688 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3689 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3690 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3691 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3692 } else
3693 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3694 }
3695 } else {
3696 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3696, __PRETTY_FUNCTION__))
;
3697 ArgValue =
3698 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3699 }
3700
3701 // If value is passed via pointer - do a load.
3702 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3703 ArgValue =
3704 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3705
3706 InVals.push_back(ArgValue);
3707 }
3708
3709 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3710 // Swift calling convention does not require we copy the sret argument
3711 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3712 if (CallConv == CallingConv::Swift)
3713 continue;
3714
3715 // All x86 ABIs require that for returning structs by value we copy the
3716 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3717 // the argument into a virtual register so that we can access it from the
3718 // return points.
3719 if (Ins[I].Flags.isSRet()) {
3720 Register Reg = FuncInfo->getSRetReturnReg();
3721 if (!Reg) {
3722 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3723 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3724 FuncInfo->setSRetReturnReg(Reg);
3725 }
3726 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3727 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3728 break;
3729 }
3730 }
3731
3732 unsigned StackSize = CCInfo.getNextStackOffset();
3733 // Align stack specially for tail calls.
3734 if (shouldGuaranteeTCO(CallConv,
3735 MF.getTarget().Options.GuaranteedTailCallOpt))
3736 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3737
3738 if (IsVarArg)
3739 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
3740 .lowerVarArgsParameters(Chain, StackSize);
3741
3742 // Some CCs need callee pop.
3743 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
3744 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3745 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3746 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3747 // X86 interrupts must pop the error code (and the alignment padding) if
3748 // present.
3749 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3750 } else {
3751 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3752 // If this is an sret function, the return should pop the hidden pointer.
3753 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3754 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3755 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3756 FuncInfo->setBytesToPopOnReturn(4);
3757 }
3758
3759 if (!Is64Bit) {
3760 // RegSaveFrameIndex is X86-64 only.
3761 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3762 }
3763
3764 FuncInfo->setArgumentStackSize(StackSize);
3765
3766 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3767 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3768 if (Personality == EHPersonality::CoreCLR) {
3769 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3769, __PRETTY_FUNCTION__))
;
3770 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3771 // that we'd prefer this slot be allocated towards the bottom of the frame
3772 // (i.e. near the stack pointer after allocating the frame). Every
3773 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3774 // offset from the bottom of this and each funclet's frame must be the
3775 // same, so the size of funclets' (mostly empty) frames is dictated by
3776 // how far this slot is from the bottom (since they allocate just enough
3777 // space to accommodate holding this slot at the correct offset).
3778 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
3779 EHInfo->PSPSymFrameIdx = PSPSymFI;
3780 }
3781 }
3782
3783 if (CallConv == CallingConv::X86_RegCall ||
3784 F.hasFnAttribute("no_caller_saved_registers")) {
3785 MachineRegisterInfo &MRI = MF.getRegInfo();
3786 for (std::pair<Register, Register> Pair : MRI.liveins())
3787 MRI.disableCalleeSavedRegister(Pair.first);
3788 }
3789
3790 return Chain;
3791}
3792
3793SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3794 SDValue Arg, const SDLoc &dl,
3795 SelectionDAG &DAG,
3796 const CCValAssign &VA,
3797 ISD::ArgFlagsTy Flags,
3798 bool isByVal) const {
3799 unsigned LocMemOffset = VA.getLocMemOffset();
3800 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3801 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3802 StackPtr, PtrOff);
3803 if (isByVal)
3804 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3805
3806 return DAG.getStore(
3807 Chain, dl, Arg, PtrOff,
3808 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3809}
3810
3811/// Emit a load of return address if tail call
3812/// optimization is performed and it is required.
3813SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3814 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3815 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3816 // Adjust the Return address stack slot.
3817 EVT VT = getPointerTy(DAG.getDataLayout());
3818 OutRetAddr = getReturnAddressFrameIndex(DAG);
3819
3820 // Load the "old" Return address.
3821 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3822 return SDValue(OutRetAddr.getNode(), 1);
3823}
3824
3825/// Emit a store of the return address if tail call
3826/// optimization is performed and it is required (FPDiff!=0).
3827static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3828 SDValue Chain, SDValue RetAddrFrIdx,
3829 EVT PtrVT, unsigned SlotSize,
3830 int FPDiff, const SDLoc &dl) {
3831 // Store the return address to the appropriate stack slot.
3832 if (!FPDiff) return Chain;
3833 // Calculate the new stack slot for the return address.
3834 int NewReturnAddrFI =
3835 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3836 false);
3837 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3838 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3839 MachinePointerInfo::getFixedStack(
3840 DAG.getMachineFunction(), NewReturnAddrFI));
3841 return Chain;
3842}
3843
3844/// Returns a vector_shuffle mask for an movs{s|d}, movd
3845/// operation of specified width.
3846static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3847 SDValue V2) {
3848 unsigned NumElems = VT.getVectorNumElements();
3849 SmallVector<int, 8> Mask;
3850 Mask.push_back(NumElems);
3851 for (unsigned i = 1; i != NumElems; ++i)
3852 Mask.push_back(i);
3853 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3854}
3855
3856SDValue
3857X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3858 SmallVectorImpl<SDValue> &InVals) const {
3859 SelectionDAG &DAG = CLI.DAG;
3860 SDLoc &dl = CLI.DL;
3861 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3862 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3863 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3864 SDValue Chain = CLI.Chain;
3865 SDValue Callee = CLI.Callee;
3866 CallingConv::ID CallConv = CLI.CallConv;
3867 bool &isTailCall = CLI.IsTailCall;
3868 bool isVarArg = CLI.IsVarArg;
3869
3870 MachineFunction &MF = DAG.getMachineFunction();
3871 bool Is64Bit = Subtarget.is64Bit();
3872 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3873 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3874 bool IsSibcall = false;
3875 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3876 CallConv == CallingConv::Tail;
3877 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3878 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CB);
3879 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3880 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3881 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3882 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CB);
3883 bool HasNoCfCheck =
3884 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3885 bool IsIndirectCall = (CI && CI->isIndirectCall());
3886 const Module *M = MF.getMMI().getModule();
3887 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3888
3889 MachineFunction::CallSiteInfo CSInfo;
3890 if (CallConv == CallingConv::X86_INTR)
3891 report_fatal_error("X86 interrupts may not be called directly");
3892
3893 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
3894 // If we are using a GOT, disable tail calls to external symbols with
3895 // default visibility. Tail calling such a symbol requires using a GOT
3896 // relocation, which forces early binding of the symbol. This breaks code
3897 // that require lazy function symbol resolution. Using musttail or
3898 // GuaranteedTailCallOpt will override this.
3899 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3900 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3901 G->getGlobal()->hasDefaultVisibility()))
3902 isTailCall = false;
3903 }
3904
3905 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
3906 if (IsMustTail) {
3907 // Force this to be a tail call. The verifier rules are enough to ensure
3908 // that we can lower this successfully without moving the return address
3909 // around.
3910 isTailCall = true;
3911 } else if (isTailCall) {
3912 // Check if it's really possible to do a tail call.
3913 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3914 isVarArg, SR != NotStructReturn,
3915 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3916 Outs, OutVals, Ins, DAG);
3917
3918 // Sibcalls are automatically detected tailcalls which do not require
3919 // ABI changes.
3920 if (!IsGuaranteeTCO && isTailCall)
3921 IsSibcall = true;
3922
3923 if (isTailCall)
3924 ++NumTailCalls;
3925 }
3926
3927 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3928, __PRETTY_FUNCTION__))
3928 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3928, __PRETTY_FUNCTION__))
;
3929
3930 // Analyze operands of the call, assigning locations to each operand.
3931 SmallVector<CCValAssign, 16> ArgLocs;
3932 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3933
3934 // Allocate shadow area for Win64.
3935 if (IsWin64)
3936 CCInfo.AllocateStack(32, Align(8));
3937
3938 CCInfo.AnalyzeArguments(Outs, CC_X86);
3939
3940 // In vectorcall calling convention a second pass is required for the HVA
3941 // types.
3942 if (CallingConv::X86_VectorCall == CallConv) {
3943 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3944 }
3945
3946 // Get a count of how many bytes are to be pushed on the stack.
3947 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3948 if (IsSibcall)
3949 // This is a sibcall. The memory operands are available in caller's
3950 // own caller's stack.
3951 NumBytes = 0;
3952 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
3953 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3954
3955 int FPDiff = 0;
3956 if (isTailCall && !IsSibcall && !IsMustTail) {
3957 // Lower arguments at fp - stackoffset + fpdiff.
3958 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3959
3960 FPDiff = NumBytesCallerPushed - NumBytes;
3961
3962 // Set the delta of movement of the returnaddr stackslot.
3963 // But only set if delta is greater than previous delta.
3964 if (FPDiff < X86Info->getTCReturnAddrDelta())
3965 X86Info->setTCReturnAddrDelta(FPDiff);
3966 }
3967
3968 unsigned NumBytesToPush = NumBytes;
3969 unsigned NumBytesToPop = NumBytes;
3970
3971 // If we have an inalloca argument, all stack space has already been allocated
3972 // for us and be right at the top of the stack. We don't support multiple
3973 // arguments passed in memory when using inalloca.
3974 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3975 NumBytesToPush = 0;
3976 if (!ArgLocs.back().isMemLoc())
3977 report_fatal_error("cannot use inalloca attribute on a register "
3978 "parameter");
3979 if (ArgLocs.back().getLocMemOffset() != 0)
3980 report_fatal_error("any parameter with the inalloca attribute must be "
3981 "the only memory argument");
3982 } else if (CLI.IsPreallocated) {
3983 assert(ArgLocs.back().isMemLoc() &&((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3985, __PRETTY_FUNCTION__))
3984 "cannot use preallocated attribute on a register "((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3985, __PRETTY_FUNCTION__))
3985 "parameter")((ArgLocs.back().isMemLoc() && "cannot use preallocated attribute on a register "
"parameter") ? static_cast<void> (0) : __assert_fail (
"ArgLocs.back().isMemLoc() && \"cannot use preallocated attribute on a register \" \"parameter\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 3985, __PRETTY_FUNCTION__))
;
3986 SmallVector<size_t, 4> PreallocatedOffsets;
3987 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
3988 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
3989 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
3990 }
3991 }
3992 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
3993 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
3994 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
3995 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
3996 NumBytesToPush = 0;
3997 }
3998
3999 if (!IsSibcall && !IsMustTail)
4000 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4001 NumBytes - NumBytesToPush, dl);
4002
4003 SDValue RetAddrFrIdx;
4004 // Load return address for tail calls.
4005 if (isTailCall && FPDiff)
4006 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4007 Is64Bit, FPDiff, dl);
4008
4009 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4010 SmallVector<SDValue, 8> MemOpChains;
4011 SDValue StackPtr;
4012
4013 // The next loop assumes that the locations are in the same order of the
4014 // input arguments.
4015 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4016, __PRETTY_FUNCTION__))
4016 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4016, __PRETTY_FUNCTION__))
;
4017
4018 // Walk the register/memloc assignments, inserting copies/loads. In the case
4019 // of tail call optimization arguments are handle later.
4020 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4021 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4022 ++I, ++OutIndex) {
4023 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4023, __PRETTY_FUNCTION__))
;
4024 // Skip inalloca/preallocated arguments, they have already been written.
4025 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4026 if (Flags.isInAlloca() || Flags.isPreallocated())
4027 continue;
4028
4029 CCValAssign &VA = ArgLocs[I];
4030 EVT RegVT = VA.getLocVT();
4031 SDValue Arg = OutVals[OutIndex];
4032 bool isByVal = Flags.isByVal();
4033
4034 // Promote the value if needed.
4035 switch (VA.getLocInfo()) {
4036 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4036)
;
4037 case CCValAssign::Full: break;
4038 case CCValAssign::SExt:
4039 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4040 break;
4041 case CCValAssign::ZExt:
4042 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4043 break;
4044 case CCValAssign::AExt:
4045 if (Arg.getValueType().isVector() &&
4046 Arg.getValueType().getVectorElementType() == MVT::i1)
4047 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4048 else if (RegVT.is128BitVector()) {
4049 // Special case: passing MMX values in XMM registers.
4050 Arg = DAG.getBitcast(MVT::i64, Arg);
4051 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4052 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4053 } else
4054 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4055 break;
4056 case CCValAssign::BCvt:
4057 Arg = DAG.getBitcast(RegVT, Arg);
4058 break;
4059 case CCValAssign::Indirect: {
4060 if (isByVal) {
4061 // Memcpy the argument to a temporary stack slot to prevent
4062 // the caller from seeing any modifications the callee may make
4063 // as guaranteed by the `byval` attribute.
4064 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4065 Flags.getByValSize(),
4066 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4067 SDValue StackSlot =
4068 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4069 Chain =
4070 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4071 // From now on treat this as a regular pointer
4072 Arg = StackSlot;
4073 isByVal = false;
4074 } else {
4075 // Store the argument.
4076 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4077 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4078 Chain = DAG.getStore(
4079 Chain, dl, Arg, SpillSlot,
4080 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4081 Arg = SpillSlot;
4082 }
4083 break;
4084 }
4085 }
4086
4087 if (VA.needsCustom()) {
4088 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4089, __PRETTY_FUNCTION__))
4089 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4089, __PRETTY_FUNCTION__))
;
4090 // Split v64i1 value into two registers
4091 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4092 } else if (VA.isRegLoc()) {
4093 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4094 const TargetOptions &Options = DAG.getTarget().Options;
4095 if (Options.EmitCallSiteInfo)
4096 CSInfo.emplace_back(VA.getLocReg(), I);
4097 if (isVarArg && IsWin64) {
4098 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4099 // shadow reg if callee is a varargs function.
4100 Register ShadowReg;
4101 switch (VA.getLocReg()) {
4102 case X86::XMM0: ShadowReg = X86::RCX; break;
4103 case X86::XMM1: ShadowReg = X86::RDX; break;
4104 case X86::XMM2: ShadowReg = X86::R8; break;
4105 case X86::XMM3: ShadowReg = X86::R9; break;
4106 }
4107 if (ShadowReg)
4108 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4109 }
4110 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4111 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4111, __PRETTY_FUNCTION__))
;
4112 if (!StackPtr.getNode())
4113 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4114 getPointerTy(DAG.getDataLayout()));
4115 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4116 dl, DAG, VA, Flags, isByVal));
4117 }
4118 }
4119
4120 if (!MemOpChains.empty())
4121 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4122
4123 if (Subtarget.isPICStyleGOT()) {
4124 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4125 // GOT pointer.
4126 if (!isTailCall) {
4127 RegsToPass.push_back(std::make_pair(
4128 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4129 getPointerTy(DAG.getDataLayout()))));
4130 } else {
4131 // If we are tail calling and generating PIC/GOT style code load the
4132 // address of the callee into ECX. The value in ecx is used as target of
4133 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4134 // for tail calls on PIC/GOT architectures. Normally we would just put the
4135 // address of GOT into ebx and then call target@PLT. But for tail calls
4136 // ebx would be restored (since ebx is callee saved) before jumping to the
4137 // target@PLT.
4138
4139 // Note: The actual moving to ECX is done further down.
4140 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4141 if (G && !G->getGlobal()->hasLocalLinkage() &&
4142 G->getGlobal()->hasDefaultVisibility())
4143 Callee = LowerGlobalAddress(Callee, DAG);
4144 else if (isa<ExternalSymbolSDNode>(Callee))
4145 Callee = LowerExternalSymbol(Callee, DAG);
4146 }
4147 }
4148
4149 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4150 // From AMD64 ABI document:
4151 // For calls that may call functions that use varargs or stdargs
4152 // (prototype-less calls or calls to functions containing ellipsis (...) in
4153 // the declaration) %al is used as hidden argument to specify the number
4154 // of SSE registers used. The contents of %al do not need to match exactly
4155 // the number of registers, but must be an ubound on the number of SSE
4156 // registers used and is in the range 0 - 8 inclusive.
4157
4158 // Count the number of XMM registers allocated.
4159 static const MCPhysReg XMMArgRegs[] = {
4160 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4161 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4162 };
4163 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4164 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4165, __PRETTY_FUNCTION__))
4165 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4165, __PRETTY_FUNCTION__))
;
4166 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4167 DAG.getConstant(NumXMMRegs, dl,
4168 MVT::i8)));
4169 }
4170
4171 if (isVarArg && IsMustTail) {
4172 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4173 for (const auto &F : Forwards) {
4174 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4175 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4176 }
4177 }
4178
4179 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4180 // don't need this because the eligibility check rejects calls that require
4181 // shuffling arguments passed in memory.
4182 if (!IsSibcall && isTailCall) {
4183 // Force all the incoming stack arguments to be loaded from the stack
4184 // before any new outgoing arguments are stored to the stack, because the
4185 // outgoing stack slots may alias the incoming argument stack slots, and
4186 // the alias isn't otherwise explicit. This is slightly more conservative
4187 // than necessary, because it means that each store effectively depends
4188 // on every argument instead of just those arguments it would clobber.
4189 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4190
4191 SmallVector<SDValue, 8> MemOpChains2;
4192 SDValue FIN;
4193 int FI = 0;
4194 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4195 ++I, ++OutsIndex) {
4196 CCValAssign &VA = ArgLocs[I];
4197
4198 if (VA.isRegLoc()) {
4199 if (VA.needsCustom()) {
4200 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4201, __PRETTY_FUNCTION__))
4201 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4201, __PRETTY_FUNCTION__))
;
4202 // This means that we are in special case where one argument was
4203 // passed through two register locations - Skip the next location
4204 ++I;
4205 }
4206
4207 continue;
4208 }
4209
4210 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4210, __PRETTY_FUNCTION__))
;
4211 SDValue Arg = OutVals[OutsIndex];
4212 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4213 // Skip inalloca/preallocated arguments. They don't require any work.
4214 if (Flags.isInAlloca() || Flags.isPreallocated())
4215 continue;
4216 // Create frame index.
4217 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4218 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4219 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4220 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4221
4222 if (Flags.isByVal()) {
4223 // Copy relative to framepointer.
4224 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4225 if (!StackPtr.getNode())
4226 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4227 getPointerTy(DAG.getDataLayout()));
4228 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4229 StackPtr, Source);
4230
4231 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4232 ArgChain,
4233 Flags, DAG, dl));
4234 } else {
4235 // Store relative to framepointer.
4236 MemOpChains2.push_back(DAG.getStore(
4237 ArgChain, dl, Arg, FIN,
4238 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4239 }
4240 }
4241
4242 if (!MemOpChains2.empty())
4243 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4244
4245 // Store the return address to the appropriate stack slot.
4246 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4247 getPointerTy(DAG.getDataLayout()),
4248 RegInfo->getSlotSize(), FPDiff, dl);
4249 }
4250
4251 // Build a sequence of copy-to-reg nodes chained together with token chain
4252 // and flag operands which copy the outgoing args into registers.
4253 SDValue InFlag;
4254 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4255 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4256 RegsToPass[i].second, InFlag);
4257 InFlag = Chain.getValue(1);
4258 }
4259
4260 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4261 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4261, __PRETTY_FUNCTION__))
;
4262 // In the 64-bit large code model, we have to make all calls
4263 // through a register, since the call instruction's 32-bit
4264 // pc-relative offset may not be large enough to hold the whole
4265 // address.
4266 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4267 Callee->getOpcode() == ISD::ExternalSymbol) {
4268 // Lower direct calls to global addresses and external symbols. Setting
4269 // ForCall to true here has the effect of removing WrapperRIP when possible
4270 // to allow direct calls to be selected without first materializing the
4271 // address into a register.
4272 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4273 } else if (Subtarget.isTarget64BitILP32() &&
4274 Callee->getValueType(0) == MVT::i32) {
4275 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4276 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4277 }
4278
4279 // Returns a chain & a flag for retval copy to use.
4280 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4281 SmallVector<SDValue, 8> Ops;
4282
4283 if (!IsSibcall && isTailCall && !IsMustTail) {
4284 Chain = DAG.getCALLSEQ_END(Chain,
4285 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4286 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4287 InFlag = Chain.getValue(1);
4288 }
4289
4290 Ops.push_back(Chain);
4291 Ops.push_back(Callee);
4292
4293 if (isTailCall)
4294 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4295
4296 // Add argument registers to the end of the list so that they are known live
4297 // into the call.
4298 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4299 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4300 RegsToPass[i].second.getValueType()));
4301
4302 // Add a register mask operand representing the call-preserved registers.
4303 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4304 // set X86_INTR calling convention because it has the same CSR mask
4305 // (same preserved registers).
4306 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4307 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4308 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4308, __PRETTY_FUNCTION__))
;
4309
4310 // If this is an invoke in a 32-bit function using a funclet-based
4311 // personality, assume the function clobbers all registers. If an exception
4312 // is thrown, the runtime will not restore CSRs.
4313 // FIXME: Model this more precisely so that we can register allocate across
4314 // the normal edge and spill and fill across the exceptional edge.
4315 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4316 const Function &CallerFn = MF.getFunction();
4317 EHPersonality Pers =
4318 CallerFn.hasPersonalityFn()
4319 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4320 : EHPersonality::Unknown;
4321 if (isFuncletEHPersonality(Pers))
4322 Mask = RegInfo->getNoPreservedMask();
4323 }
4324
4325 // Define a new register mask from the existing mask.
4326 uint32_t *RegMask = nullptr;
4327
4328 // In some calling conventions we need to remove the used physical registers
4329 // from the reg mask.
4330 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4331 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4332
4333 // Allocate a new Reg Mask and copy Mask.
4334 RegMask = MF.allocateRegMask();
4335 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4336 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4337
4338 // Make sure all sub registers of the argument registers are reset
4339 // in the RegMask.
4340 for (auto const &RegPair : RegsToPass)
4341 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4342 SubRegs.isValid(); ++SubRegs)
4343 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4344
4345 // Create the RegMask Operand according to our updated mask.
4346 Ops.push_back(DAG.getRegisterMask(RegMask));
4347 } else {
4348 // Create the RegMask Operand according to the static mask.
4349 Ops.push_back(DAG.getRegisterMask(Mask));
4350 }
4351
4352 if (InFlag.getNode())
4353 Ops.push_back(InFlag);
4354
4355 if (isTailCall) {
4356 // We used to do:
4357 //// If this is the first return lowered for this function, add the regs
4358 //// to the liveout set for the function.
4359 // This isn't right, although it's probably harmless on x86; liveouts
4360 // should be computed from returns not tail calls. Consider a void
4361 // function making a tail call to a function returning int.
4362 MF.getFrameInfo().setHasTailCall();
4363 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4364 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4365 return Ret;
4366 }
4367
4368 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4369 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4370 } else {
4371 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4372 }
4373 InFlag = Chain.getValue(1);
4374 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4375 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4376
4377 // Save heapallocsite metadata.
4378 if (CLI.CB)
4379 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4380 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4381
4382 // Create the CALLSEQ_END node.
4383 unsigned NumBytesForCalleeToPop;
4384 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4385 DAG.getTarget().Options.GuaranteedTailCallOpt))
4386 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4387 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4388 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4389 SR == StackStructReturn)
4390 // If this is a call to a struct-return function, the callee
4391 // pops the hidden struct pointer, so we have to push it back.
4392 // This is common for Darwin/X86, Linux & Mingw32 targets.
4393 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4394 NumBytesForCalleeToPop = 4;
4395 else
4396 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4397
4398 // Returns a flag for retval copy to use.
4399 if (!IsSibcall) {
4400 Chain = DAG.getCALLSEQ_END(Chain,
4401 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4402 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4403 true),
4404 InFlag, dl);
4405 InFlag = Chain.getValue(1);
4406 }
4407
4408 // Handle result values, copying them out of physregs into vregs that we
4409 // return.
4410 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4411 InVals, RegMask);
4412}
4413
4414//===----------------------------------------------------------------------===//
4415// Fast Calling Convention (tail call) implementation
4416//===----------------------------------------------------------------------===//
4417
4418// Like std call, callee cleans arguments, convention except that ECX is
4419// reserved for storing the tail called function address. Only 2 registers are
4420// free for argument passing (inreg). Tail call optimization is performed
4421// provided:
4422// * tailcallopt is enabled
4423// * caller/callee are fastcc
4424// On X86_64 architecture with GOT-style position independent code only local
4425// (within module) calls are supported at the moment.
4426// To keep the stack aligned according to platform abi the function
4427// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4428// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4429// If a tail called function callee has more arguments than the caller the
4430// caller needs to make sure that there is room to move the RETADDR to. This is
4431// achieved by reserving an area the size of the argument delta right after the
4432// original RETADDR, but before the saved framepointer or the spilled registers
4433// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4434// stack layout:
4435// arg1
4436// arg2
4437// RETADDR
4438// [ new RETADDR
4439// move area ]
4440// (possible EBP)
4441// ESI
4442// EDI
4443// local1 ..
4444
4445/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4446/// requirement.
4447unsigned
4448X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4449 SelectionDAG &DAG) const {
4450 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4451 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4452 assert(StackSize % SlotSize == 0 &&((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4453, __PRETTY_FUNCTION__))
4453 "StackSize must be a multiple of SlotSize")((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize"
) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4453, __PRETTY_FUNCTION__))
;
4454 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4455}
4456
4457/// Return true if the given stack call argument is already available in the
4458/// same position (relatively) of the caller's incoming argument stack.
4459static
4460bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4461 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4462 const X86InstrInfo *TII, const CCValAssign &VA) {
4463 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4464
4465 for (;;) {
4466 // Look through nodes that don't alter the bits of the incoming value.
4467 unsigned Op = Arg.getOpcode();
4468 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4469 Arg = Arg.getOperand(0);
4470 continue;
4471 }
4472 if (Op == ISD::TRUNCATE) {
4473 const SDValue &TruncInput = Arg.getOperand(0);
4474 if (TruncInput.getOpcode() == ISD::AssertZext &&
4475 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4476 Arg.getValueType()) {
4477 Arg = TruncInput.getOperand(0);
4478 continue;
4479 }
4480 }
4481 break;
4482 }
4483
4484 int FI = INT_MAX2147483647;
4485 if (Arg.getOpcode() == ISD::CopyFromReg) {
4486 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4487 if (!VR.isVirtual())
4488 return false;
4489 MachineInstr *Def = MRI->getVRegDef(VR);
4490 if (!Def)
4491 return false;
4492 if (!Flags.isByVal()) {
4493 if (!TII->isLoadFromStackSlot(*Def, FI))
4494 return false;
4495 } else {
4496 unsigned Opcode = Def->getOpcode();
4497 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4498 Opcode == X86::LEA64_32r) &&
4499 Def->getOperand(1).isFI()) {
4500 FI = Def->getOperand(1).getIndex();
4501 Bytes = Flags.getByValSize();
4502 } else
4503 return false;
4504 }
4505 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4506 if (Flags.isByVal())
4507 // ByVal argument is passed in as a pointer but it's now being
4508 // dereferenced. e.g.
4509 // define @foo(%struct.X* %A) {
4510 // tail call @bar(%struct.X* byval %A)
4511 // }
4512 return false;
4513 SDValue Ptr = Ld->getBasePtr();
4514 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4515 if (!FINode)
4516 return false;
4517 FI = FINode->getIndex();
4518 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4519 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4520 FI = FINode->getIndex();
4521 Bytes = Flags.getByValSize();
4522 } else
4523 return false;
4524
4525 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4525, __PRETTY_FUNCTION__))
;
4526 if (!MFI.isFixedObjectIndex(FI))
4527 return false;
4528
4529 if (Offset != MFI.getObjectOffset(FI))
4530 return false;
4531
4532 // If this is not byval, check that the argument stack object is immutable.
4533 // inalloca and argument copy elision can create mutable argument stack
4534 // objects. Byval objects can be mutated, but a byval call intends to pass the
4535 // mutated memory.
4536 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4537 return false;
4538
4539 if (VA.getLocVT().getFixedSizeInBits() >
4540 Arg.getValueSizeInBits().getFixedSize()) {
4541 // If the argument location is wider than the argument type, check that any
4542 // extension flags match.
4543 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4544 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4545 return false;
4546 }
4547 }
4548
4549 return Bytes == MFI.getObjectSize(FI);
4550}
4551
4552/// Check whether the call is eligible for tail call optimization. Targets
4553/// that want to do tail call optimization should implement this function.
4554bool X86TargetLowering::IsEligibleForTailCallOptimization(
4555 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4556 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4557 const SmallVectorImpl<ISD::OutputArg> &Outs,
4558 const SmallVectorImpl<SDValue> &OutVals,
4559 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4560 if (!mayTailCallThisCC(CalleeCC))
4561 return false;
4562
4563 // If -tailcallopt is specified, make fastcc functions tail-callable.
4564 MachineFunction &MF = DAG.getMachineFunction();
4565 const Function &CallerF = MF.getFunction();
4566
4567 // If the function return type is x86_fp80 and the callee return type is not,
4568 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4569 // perform a tailcall optimization here.
4570 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4571 return false;
4572
4573 CallingConv::ID CallerCC = CallerF.getCallingConv();
4574 bool CCMatch = CallerCC == CalleeCC;
4575 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4576 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4577 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4578 CalleeCC == CallingConv::Tail;
4579
4580 // Win64 functions have extra shadow space for argument homing. Don't do the
4581 // sibcall if the caller and callee have mismatched expectations for this
4582 // space.
4583 if (IsCalleeWin64 != IsCallerWin64)
4584 return false;
4585
4586 if (IsGuaranteeTCO) {
4587 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4588 return true;
4589 return false;
4590 }
4591
4592 // Look for obvious safe cases to perform tail call optimization that do not
4593 // require ABI changes. This is what gcc calls sibcall.
4594
4595 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4596 // emit a special epilogue.
4597 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4598 if (RegInfo->needsStackRealignment(MF))
4599 return false;
4600
4601 // Also avoid sibcall optimization if either caller or callee uses struct
4602 // return semantics.
4603 if (isCalleeStructRet || isCallerStructRet)
4604 return false;
4605
4606 // Do not sibcall optimize vararg calls unless all arguments are passed via
4607 // registers.
4608 LLVMContext &C = *DAG.getContext();
4609 if (isVarArg && !Outs.empty()) {
4610 // Optimizing for varargs on Win64 is unlikely to be safe without
4611 // additional testing.
4612 if (IsCalleeWin64 || IsCallerWin64)
4613 return false;
4614
4615 SmallVector<CCValAssign, 16> ArgLocs;
4616 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4617
4618 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4619 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4620 if (!ArgLocs[i].isRegLoc())
4621 return false;
4622 }
4623
4624 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4625 // stack. Therefore, if it's not used by the call it is not safe to optimize
4626 // this into a sibcall.
4627 bool Unused = false;
4628 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4629 if (!Ins[i].Used) {
4630 Unused = true;
4631 break;
4632 }
4633 }
4634 if (Unused) {
4635 SmallVector<CCValAssign, 16> RVLocs;
4636 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4637 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4638 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4639 CCValAssign &VA = RVLocs[i];
4640 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4641 return false;
4642 }
4643 }
4644
4645 // Check that the call results are passed in the same way.
4646 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4647 RetCC_X86, RetCC_X86))
4648 return false;
4649 // The callee has to preserve all registers the caller needs to preserve.
4650 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4651 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4652 if (!CCMatch) {
4653 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4654 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4655 return false;
4656 }
4657
4658 unsigned StackArgsSize = 0;
4659
4660 // If the callee takes no arguments then go on to check the results of the
4661 // call.
4662 if (!Outs.empty()) {
4663 // Check if stack adjustment is needed. For now, do not do this if any
4664 // argument is passed on the stack.
4665 SmallVector<CCValAssign, 16> ArgLocs;
4666 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4667
4668 // Allocate shadow area for Win64
4669 if (IsCalleeWin64)
4670 CCInfo.AllocateStack(32, Align(8));
4671
4672 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4673 StackArgsSize = CCInfo.getNextStackOffset();
4674
4675 if (CCInfo.getNextStackOffset()) {
4676 // Check if the arguments are already laid out in the right way as
4677 // the caller's fixed stack objects.
4678 MachineFrameInfo &MFI = MF.getFrameInfo();
4679 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4680 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4681 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4682 CCValAssign &VA = ArgLocs[i];
4683 SDValue Arg = OutVals[i];
4684 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4685 if (VA.getLocInfo() == CCValAssign::Indirect)
4686 return false;
4687 if (!VA.isRegLoc()) {
4688 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4689 MFI, MRI, TII, VA))
4690 return false;
4691 }
4692 }
4693 }
4694
4695 bool PositionIndependent = isPositionIndependent();
4696 // If the tailcall address may be in a register, then make sure it's
4697 // possible to register allocate for it. In 32-bit, the call address can
4698 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4699 // callee-saved registers are restored. These happen to be the same
4700 // registers used to pass 'inreg' arguments so watch out for those.
4701 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4702 !isa<ExternalSymbolSDNode>(Callee)) ||
4703 PositionIndependent)) {
4704 unsigned NumInRegs = 0;
4705 // In PIC we need an extra register to formulate the address computation
4706 // for the callee.
4707 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4708
4709 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4710 CCValAssign &VA = ArgLocs[i];
4711 if (!VA.isRegLoc())
4712 continue;
4713 Register Reg = VA.getLocReg();
4714 switch (Reg) {
4715 default: break;
4716 case X86::EAX: case X86::EDX: case X86::ECX:
4717 if (++NumInRegs == MaxInRegs)
4718 return false;
4719 break;
4720 }
4721 }
4722 }
4723
4724 const MachineRegisterInfo &MRI = MF.getRegInfo();
4725 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4726 return false;
4727 }
4728
4729 bool CalleeWillPop =
4730 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4731 MF.getTarget().Options.GuaranteedTailCallOpt);
4732
4733 if (unsigned BytesToPop =
4734 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4735 // If we have bytes to pop, the callee must pop them.
4736 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4737 if (!CalleePopMatches)
4738 return false;
4739 } else if (CalleeWillPop && StackArgsSize > 0) {
4740 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4741 return false;
4742 }
4743
4744 return true;
4745}
4746
4747FastISel *
4748X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4749 const TargetLibraryInfo *libInfo) const {
4750 return X86::createFastISel(funcInfo, libInfo);
4751}
4752
4753//===----------------------------------------------------------------------===//
4754// Other Lowering Hooks
4755//===----------------------------------------------------------------------===//
4756
4757static bool MayFoldLoad(SDValue Op) {
4758 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4759}
4760
4761static bool MayFoldIntoStore(SDValue Op) {
4762 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4763}
4764
4765static bool MayFoldIntoZeroExtend(SDValue Op) {
4766 if (Op.hasOneUse()) {
4767 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4768 return (ISD::ZERO_EXTEND == Opcode);
4769 }
4770 return false;
4771}
4772
4773static bool isTargetShuffle(unsigned Opcode) {
4774 switch(Opcode) {
4775 default: return false;
4776 case X86ISD::BLENDI:
4777 case X86ISD::PSHUFB:
4778 case X86ISD::PSHUFD:
4779 case X86ISD::PSHUFHW:
4780 case X86ISD::PSHUFLW:
4781 case X86ISD::SHUFP:
4782 case X86ISD::INSERTPS:
4783 case X86ISD::EXTRQI:
4784 case X86ISD::INSERTQI:
4785 case X86ISD::VALIGN:
4786 case X86ISD::PALIGNR:
4787 case X86ISD::VSHLDQ:
4788 case X86ISD::VSRLDQ:
4789 case X86ISD::MOVLHPS:
4790 case X86ISD::MOVHLPS:
4791 case X86ISD::MOVSHDUP:
4792 case X86ISD::MOVSLDUP:
4793 case X86ISD::MOVDDUP:
4794 case X86ISD::MOVSS:
4795 case X86ISD::MOVSD:
4796 case X86ISD::UNPCKL:
4797 case X86ISD::UNPCKH:
4798 case X86ISD::VBROADCAST:
4799 case X86ISD::VPERMILPI:
4800 case X86ISD::VPERMILPV:
4801 case X86ISD::VPERM2X128:
4802 case X86ISD::SHUF128:
4803 case X86ISD::VPERMIL2:
4804 case X86ISD::VPERMI:
4805 case X86ISD::VPPERM:
4806 case X86ISD::VPERMV:
4807 case X86ISD::VPERMV3:
4808 case X86ISD::VZEXT_MOVL:
4809 return true;
4810 }
4811}
4812
4813static bool isTargetShuffleVariableMask(unsigned Opcode) {
4814 switch (Opcode) {
4815 default: return false;
4816 // Target Shuffles.
4817 case X86ISD::PSHUFB:
4818 case X86ISD::VPERMILPV:
4819 case X86ISD::VPERMIL2:
4820 case X86ISD::VPPERM:
4821 case X86ISD::VPERMV:
4822 case X86ISD::VPERMV3:
4823 return true;
4824 // 'Faux' Target Shuffles.
4825 case ISD::OR:
4826 case ISD::AND:
4827 case X86ISD::ANDNP:
4828 return true;
4829 }
4830}
4831
4832static bool isTargetShuffleSplat(SDValue Op) {
4833 unsigned Opcode = Op.getOpcode();
4834 if (Opcode == ISD::EXTRACT_SUBVECTOR)
4835 return isTargetShuffleSplat(Op.getOperand(0));
4836 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
4837}
4838
4839SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4840 MachineFunction &MF = DAG.getMachineFunction();
4841 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4842 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4843 int ReturnAddrIndex = FuncInfo->getRAIndex();
4844
4845 if (ReturnAddrIndex == 0) {
4846 // Set up a frame object for the return address.
4847 unsigned SlotSize = RegInfo->getSlotSize();
4848 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4849 -(int64_t)SlotSize,
4850 false);
4851 FuncInfo->setRAIndex(ReturnAddrIndex);
4852 }
4853
4854 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4855}
4856
4857bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4858 bool hasSymbolicDisplacement) {
4859 // Offset should fit into 32 bit immediate field.
4860 if (!isInt<32>(Offset))
4861 return false;
4862
4863 // If we don't have a symbolic displacement - we don't have any extra
4864 // restrictions.
4865 if (!hasSymbolicDisplacement)
4866 return true;
4867
4868 // FIXME: Some tweaks might be needed for medium code model.
4869 if (M != CodeModel::Small && M != CodeModel::Kernel)
4870 return false;
4871
4872 // For small code model we assume that latest object is 16MB before end of 31
4873 // bits boundary. We may also accept pretty large negative constants knowing
4874 // that all objects are in the positive half of address space.
4875 if (M == CodeModel::Small && Offset < 16*1024*1024)
4876 return true;
4877
4878 // For kernel code model we know that all object resist in the negative half
4879 // of 32bits address space. We may not accept negative offsets, since they may
4880 // be just off and we may accept pretty large positive ones.
4881 if (M == CodeModel::Kernel && Offset >= 0)
4882 return true;
4883
4884 return false;
4885}
4886
4887/// Determines whether the callee is required to pop its own arguments.
4888/// Callee pop is necessary to support tail calls.
4889bool X86::isCalleePop(CallingConv::ID CallingConv,
4890 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4891 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4892 // can guarantee TCO.
4893 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4894 return true;
4895
4896 switch (CallingConv) {
4897 default:
4898 return false;
4899 case CallingConv::X86_StdCall:
4900 case CallingConv::X86_FastCall:
4901 case CallingConv::X86_ThisCall:
4902 case CallingConv::X86_VectorCall:
4903 return !is64Bit;
4904 }
4905}
4906
4907/// Return true if the condition is an signed comparison operation.
4908static bool isX86CCSigned(unsigned X86CC) {
4909 switch (X86CC) {
4910 default:
4911 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4911)
;
4912 case X86::COND_E:
4913 case X86::COND_NE:
4914 case X86::COND_B:
4915 case X86::COND_A:
4916 case X86::COND_BE:
4917 case X86::COND_AE:
4918 return false;
4919 case X86::COND_G:
4920 case X86::COND_GE:
4921 case X86::COND_L:
4922 case X86::COND_LE:
4923 return true;
4924 }
4925}
4926
4927static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4928 switch (SetCCOpcode) {
4929 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 4929)
;
4930 case ISD::SETEQ: return X86::COND_E;
4931 case ISD::SETGT: return X86::COND_G;
4932 case ISD::SETGE: return X86::COND_GE;
4933 case ISD::SETLT: return X86::COND_L;
4934 case ISD::SETLE: return X86::COND_LE;
4935 case ISD::SETNE: return X86::COND_NE;
4936 case ISD::SETULT: return X86::COND_B;
4937 case ISD::SETUGT: return X86::COND_A;
4938 case ISD::SETULE: return X86::COND_BE;
4939 case ISD::SETUGE: return X86::COND_AE;
4940 }
4941}
4942
4943/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4944/// condition code, returning the condition code and the LHS/RHS of the
4945/// comparison to make.
4946static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4947 bool isFP, SDValue &LHS, SDValue &RHS,
4948 SelectionDAG &DAG) {
4949 if (!isFP) {
4950 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4951 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4952 // X > -1 -> X == 0, jump !sign.
4953 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4954 return X86::COND_NS;
4955 }
4956 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4957 // X < 0 -> X == 0, jump on sign.
4958 return X86::COND_S;
4959 }
4960 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
4961 // X >= 0 -> X == 0, jump on !sign.
4962 return X86::COND_NS;
4963 }
4964 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
4965 // X < 1 -> X <= 0
4966 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4967 return X86::COND_LE;
4968 }
4969 }
4970
4971 return TranslateIntegerX86CC(SetCCOpcode);
4972 }
4973
4974 // First determine if it is required or is profitable to flip the operands.
4975
4976 // If LHS is a foldable load, but RHS is not, flip the condition.
4977 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4978 !ISD::isNON_EXTLoad(RHS.getNode())) {
4979 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4980 std::swap(LHS, RHS);
4981 }
4982
4983 switch (SetCCOpcode) {
4984 default: break;
4985 case ISD::SETOLT:
4986 case ISD::SETOLE:
4987 case ISD::SETUGT:
4988 case ISD::SETUGE:
4989 std::swap(LHS, RHS);
4990 break;
4991 }
4992
4993 // On a floating point condition, the flags are set as follows:
4994 // ZF PF CF op
4995 // 0 | 0 | 0 | X > Y
4996 // 0 | 0 | 1 | X < Y
4997 // 1 | 0 | 0 | X == Y
4998 // 1 | 1 | 1 | unordered
4999 switch (SetCCOpcode) {
5000 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5000)
;
5001 case ISD::SETUEQ:
5002 case ISD::SETEQ: return X86::COND_E;
5003 case ISD::SETOLT: // flipped
5004 case ISD::SETOGT:
5005 case ISD::SETGT: return X86::COND_A;
5006 case ISD::SETOLE: // flipped
5007 case ISD::SETOGE:
5008 case ISD::SETGE: return X86::COND_AE;
5009 case ISD::SETUGT: // flipped
5010 case ISD::SETULT:
5011 case ISD::SETLT: return X86::COND_B;
5012 case ISD::SETUGE: // flipped
5013 case ISD::SETULE:
5014 case ISD::SETLE: return X86::COND_BE;
5015 case ISD::SETONE:
5016 case ISD::SETNE: return X86::COND_NE;
5017 case ISD::SETUO: return X86::COND_P;
5018 case ISD::SETO: return X86::COND_NP;
5019 case ISD::SETOEQ:
5020 case ISD::SETUNE: return X86::COND_INVALID;
5021 }
5022}
5023
5024/// Is there a floating point cmov for the specific X86 condition code?
5025/// Current x86 isa includes the following FP cmov instructions:
5026/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5027static bool hasFPCMov(unsigned X86CC) {
5028 switch (X86CC) {
5029 default:
5030 return false;
5031 case X86::COND_B:
5032 case X86::COND_BE:
5033 case X86::COND_E:
5034 case X86::COND_P:
5035 case X86::COND_A:
5036 case X86::COND_AE:
5037 case X86::COND_NE:
5038 case X86::COND_NP:
5039 return true;
5040 }
5041}
5042
5043
5044bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5045 const CallInst &I,
5046 MachineFunction &MF,
5047 unsigned Intrinsic) const {
5048 Info.flags = MachineMemOperand::MONone;
5049 Info.offset = 0;
5050
5051 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5052 if (!IntrData) {
5053 switch (Intrinsic) {
5054 case Intrinsic::x86_aesenc128kl:
5055 case Intrinsic::x86_aesdec128kl:
5056 Info.opc = ISD::INTRINSIC_W_CHAIN;
5057 Info.ptrVal = I.getArgOperand(1);
5058 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5059 Info.align = Align(1);
5060 Info.flags |= MachineMemOperand::MOLoad;
5061 return true;
5062 case Intrinsic::x86_aesenc256kl:
5063 case Intrinsic::x86_aesdec256kl:
5064 Info.opc = ISD::INTRINSIC_W_CHAIN;
5065 Info.ptrVal = I.getArgOperand(1);
5066 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5067 Info.align = Align(1);
5068 Info.flags |= MachineMemOperand::MOLoad;
5069 return true;
5070 case Intrinsic::x86_aesencwide128kl:
5071 case Intrinsic::x86_aesdecwide128kl:
5072 Info.opc = ISD::INTRINSIC_W_CHAIN;
5073 Info.ptrVal = I.getArgOperand(0);
5074 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5075 Info.align = Align(1);
5076 Info.flags |= MachineMemOperand::MOLoad;
5077 return true;
5078 case Intrinsic::x86_aesencwide256kl:
5079 case Intrinsic::x86_aesdecwide256kl:
5080 Info.opc = ISD::INTRINSIC_W_CHAIN;
5081 Info.ptrVal = I.getArgOperand(0);
5082 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5083 Info.align = Align(1);
5084 Info.flags |= MachineMemOperand::MOLoad;
5085 return true;
5086 }
5087 return false;
5088 }
5089
5090 switch (IntrData->Type) {
5091 case TRUNCATE_TO_MEM_VI8:
5092 case TRUNCATE_TO_MEM_VI16:
5093 case TRUNCATE_TO_MEM_VI32: {
5094 Info.opc = ISD::INTRINSIC_VOID;
5095 Info.ptrVal = I.getArgOperand(0);
5096 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5097 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5098 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5099 ScalarVT = MVT::i8;
5100 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5101 ScalarVT = MVT::i16;
5102 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5103 ScalarVT = MVT::i32;
5104
5105 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5106 Info.align = Align(1);
5107 Info.flags |= MachineMemOperand::MOStore;
5108 break;
5109 }
5110 case GATHER:
5111 case GATHER_AVX2: {
5112 Info.opc = ISD::INTRINSIC_W_CHAIN;
5113 Info.ptrVal = nullptr;
5114 MVT DataVT = MVT::getVT(I.getType());
5115 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5116 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5117 IndexVT.getVectorNumElements());
5118 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5119 Info.align = Align(1);
5120 Info.flags |= MachineMemOperand::MOLoad;
5121 break;
5122 }
5123 case SCATTER: {
5124 Info.opc = ISD::INTRINSIC_VOID;
5125 Info.ptrVal = nullptr;
5126 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5127 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5128 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5129 IndexVT.getVectorNumElements());
5130 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5131 Info.align = Align(1);
5132 Info.flags |= MachineMemOperand::MOStore;
5133 break;
5134 }
5135 default:
5136 return false;
5137 }
5138
5139 return true;
5140}
5141
5142/// Returns true if the target can instruction select the
5143/// specified FP immediate natively. If false, the legalizer will
5144/// materialize the FP immediate as a load from a constant pool.
5145bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5146 bool ForCodeSize) const {
5147 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5148 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5149 return true;
5150 }
5151 return false;
5152}
5153
5154bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5155 ISD::LoadExtType ExtTy,
5156 EVT NewVT) const {
5157 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"
) ? static_cast<void> (0) : __assert_fail ("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5157, __PRETTY_FUNCTION__))
;
5158
5159 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5160 // relocation target a movq or addq instruction: don't let the load shrink.
5161 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5162 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5163 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5164 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5165
5166 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5167 // those uses are extracted directly into a store, then the extract + store
5168 // can be store-folded. Therefore, it's probably not worth splitting the load.
5169 EVT VT = Load->getValueType(0);
5170 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5171 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5172 // Skip uses of the chain value. Result 0 of the node is the load value.
5173 if (UI.getUse().getResNo() != 0)
5174 continue;
5175
5176 // If this use is not an extract + store, it's probably worth splitting.
5177 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5178 UI->use_begin()->getOpcode() != ISD::STORE)
5179 return true;
5180 }
5181 // All non-chain uses are extract + store.
5182 return false;
5183 }
5184
5185 return true;
5186}
5187
5188/// Returns true if it is beneficial to convert a load of a constant
5189/// to just the constant itself.
5190bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5191 Type *Ty) const {
5192 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5192, __PRETTY_FUNCTION__))
;
5193
5194 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5195 if (BitSize == 0 || BitSize > 64)
5196 return false;
5197 return true;
5198}
5199
5200bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5201 // If we are using XMM registers in the ABI and the condition of the select is
5202 // a floating-point compare and we have blendv or conditional move, then it is
5203 // cheaper to select instead of doing a cross-register move and creating a
5204 // load that depends on the compare result.
5205 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5206 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5207}
5208
5209bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5210 // TODO: It might be a win to ease or lift this restriction, but the generic
5211 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5212 if (VT.isVector() && Subtarget.hasAVX512())
5213 return false;
5214
5215 return true;
5216}
5217
5218bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5219 SDValue C) const {
5220 // TODO: We handle scalars using custom code, but generic combining could make
5221 // that unnecessary.
5222 APInt MulC;
5223 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5224 return false;
5225
5226 // Find the type this will be legalized too. Otherwise we might prematurely
5227 // convert this to shl+add/sub and then still have to type legalize those ops.
5228 // Another choice would be to defer the decision for illegal types until
5229 // after type legalization. But constant splat vectors of i64 can't make it
5230 // through type legalization on 32-bit targets so we would need to special
5231 // case vXi64.
5232 while (getTypeAction(Context, VT) != TypeLegal)
5233 VT = getTypeToTransformTo(Context, VT);
5234
5235 // If vector multiply is legal, assume that's faster than shl + add/sub.
5236 // TODO: Multiply is a complex op with higher latency and lower throughput in
5237 // most implementations, so this check could be loosened based on type
5238 // and/or a CPU attribute.
5239 if (isOperationLegal(ISD::MUL, VT))
5240 return false;
5241
5242 // shl+add, shl+sub, shl+add+neg
5243 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5244 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5245}
5246
5247bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5248 unsigned Index) const {
5249 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5250 return false;
5251
5252 // Mask vectors support all subregister combinations and operations that
5253 // extract half of vector.
5254 if (ResVT.getVectorElementType() == MVT::i1)
5255 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5256 (Index == ResVT.getVectorNumElements()));
5257
5258 return (Index % ResVT.getVectorNumElements()) == 0;
5259}
5260
5261bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5262 unsigned Opc = VecOp.getOpcode();
5263
5264 // Assume target opcodes can't be scalarized.
5265 // TODO - do we have any exceptions?
5266 if (Opc >= ISD::BUILTIN_OP_END)
5267 return false;
5268
5269 // If the vector op is not supported, try to convert to scalar.
5270 EVT VecVT = VecOp.getValueType();
5271 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5272 return true;
5273
5274 // If the vector op is supported, but the scalar op is not, the transform may
5275 // not be worthwhile.
5276 EVT ScalarVT = VecVT.getScalarType();
5277 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5278}
5279
5280bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5281 bool) const {
5282 // TODO: Allow vectors?
5283 if (VT.isVector())
5284 return false;
5285 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5286}
5287
5288bool X86TargetLowering::isCheapToSpeculateCttz() const {
5289 // Speculate cttz only if we can directly use TZCNT.
5290 return Subtarget.hasBMI();
5291}
5292
5293bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5294 // Speculate ctlz only if we can directly use LZCNT.
5295 return Subtarget.hasLZCNT();
5296}
5297
5298bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5299 const SelectionDAG &DAG,
5300 const MachineMemOperand &MMO) const {
5301 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5302 BitcastVT.getVectorElementType() == MVT::i1)
5303 return false;
5304
5305 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5306 return false;
5307
5308 // If both types are legal vectors, it's always ok to convert them.
5309 if (LoadVT.isVector() && BitcastVT.isVector() &&
5310 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5311 return true;
5312
5313 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5314}
5315
5316bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5317 const SelectionDAG &DAG) const {
5318 // Do not merge to float value size (128 bytes) if no implicit
5319 // float attribute is set.
5320 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5321 Attribute::NoImplicitFloat);
5322
5323 if (NoFloat) {
5324 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5325 return (MemVT.getSizeInBits() <= MaxIntSize);
5326 }
5327 // Make sure we don't merge greater than our preferred vector
5328 // width.
5329 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5330 return false;
5331 return true;
5332}
5333
5334bool X86TargetLowering::isCtlzFast() const {
5335 return Subtarget.hasFastLZCNT();
5336}
5337
5338bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5339 const Instruction &AndI) const {
5340 return true;
5341}
5342
5343bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5344 EVT VT = Y.getValueType();
5345
5346 if (VT.isVector())
5347 return false;
5348
5349 if (!Subtarget.hasBMI())
5350 return false;
5351
5352 // There are only 32-bit and 64-bit forms for 'andn'.
5353 if (VT != MVT::i32 && VT != MVT::i64)
5354 return false;
5355
5356 return !isa<ConstantSDNode>(Y);
5357}
5358
5359bool X86TargetLowering::hasAndNot(SDValue Y) const {
5360 EVT VT = Y.getValueType();
5361
5362 if (!VT.isVector())
5363 return hasAndNotCompare(Y);
5364
5365 // Vector.
5366
5367 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5368 return false;
5369
5370 if (VT == MVT::v4i32)
5371 return true;
5372
5373 return Subtarget.hasSSE2();
5374}
5375
5376bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5377 return X.getValueType().isScalarInteger(); // 'bt'
5378}
5379
5380bool X86TargetLowering::
5381 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5382 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5383 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5384 SelectionDAG &DAG) const {
5385 // Does baseline recommend not to perform the fold by default?
5386 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5387 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5388 return false;
5389 // For scalars this transform is always beneficial.
5390 if (X.getValueType().isScalarInteger())
5391 return true;
5392 // If all the shift amounts are identical, then transform is beneficial even
5393 // with rudimentary SSE2 shifts.
5394 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5395 return true;
5396 // If we have AVX2 with it's powerful shift operations, then it's also good.
5397 if (Subtarget.hasAVX2())
5398 return true;
5399 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5400 return NewShiftOpcode == ISD::SHL;
5401}
5402
5403bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5404 const SDNode *N, CombineLevel Level) const {
5405 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5409, __PRETTY_FUNCTION__))
5406 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5409, __PRETTY_FUNCTION__))
5407 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5409, __PRETTY_FUNCTION__))
5408 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5409, __PRETTY_FUNCTION__))
5409 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5409, __PRETTY_FUNCTION__))
;
5410 EVT VT = N->getValueType(0);
5411 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5412 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5413 // Only fold if the shift values are equal - so it folds to AND.
5414 // TODO - we should fold if either is a non-uniform vector but we don't do
5415 // the fold for non-splats yet.
5416 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5417 }
5418 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5419}
5420
5421bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5422 EVT VT = Y.getValueType();
5423
5424 // For vectors, we don't have a preference, but we probably want a mask.
5425 if (VT.isVector())
5426 return false;
5427
5428 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5429 if (VT == MVT::i64 && !Subtarget.is64Bit())
5430 return false;
5431
5432 return true;
5433}
5434
5435bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5436 SDNode *N) const {
5437 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5438 !Subtarget.isOSWindows())
5439 return false;
5440 return true;
5441}
5442
5443bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5444 // Any legal vector type can be splatted more efficiently than
5445 // loading/spilling from memory.
5446 return isTypeLegal(VT);
5447}
5448
5449MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5450 MVT VT = MVT::getIntegerVT(NumBits);
5451 if (isTypeLegal(VT))
5452 return VT;
5453
5454 // PMOVMSKB can handle this.
5455 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5456 return MVT::v16i8;
5457
5458 // VPMOVMSKB can handle this.
5459 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5460 return MVT::v32i8;
5461
5462 // TODO: Allow 64-bit type for 32-bit target.
5463 // TODO: 512-bit types should be allowed, but make sure that those
5464 // cases are handled in combineVectorSizedSetCCEquality().
5465
5466 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5467}
5468
5469/// Val is the undef sentinel value or equal to the specified value.
5470static bool isUndefOrEqual(int Val, int CmpVal) {
5471 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5472}
5473
5474/// Return true if every element in Mask is the undef sentinel value or equal to
5475/// the specified value..
5476static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
5477 return llvm::all_of(Mask, [CmpVal](int M) {
5478 return (M == SM_SentinelUndef) || (M == CmpVal);
5479 });
5480}
5481
5482/// Val is either the undef or zero sentinel value.
5483static bool isUndefOrZero(int Val) {
5484 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5485}
5486
5487/// Return true if every element in Mask, beginning from position Pos and ending
5488/// in Pos+Size is the undef sentinel value.
5489static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5490 return llvm::all_of(Mask.slice(Pos, Size),
5491 [](int M) { return M == SM_SentinelUndef; });
5492}
5493
5494/// Return true if the mask creates a vector whose lower half is undefined.
5495static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5496 unsigned NumElts = Mask.size();
5497 return isUndefInRange(Mask, 0, NumElts / 2);
5498}
5499
5500/// Return true if the mask creates a vector whose upper half is undefined.
5501static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5502 unsigned NumElts = Mask.size();
5503 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5504}
5505
5506/// Return true if Val falls within the specified range (L, H].
5507static bool isInRange(int Val, int Low, int Hi) {
5508 return (Val >= Low && Val < Hi);
5509}
5510
5511/// Return true if the value of any element in Mask falls within the specified
5512/// range (L, H].
5513static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5514 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5515}
5516
5517/// Return true if the value of any element in Mask is the zero sentinel value.
5518static bool isAnyZero(ArrayRef<int> Mask) {
5519 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5520}
5521
5522/// Return true if the value of any element in Mask is the zero or undef
5523/// sentinel values.
5524static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5525 return llvm::any_of(Mask, [](int M) {
5526 return M == SM_SentinelZero || M == SM_SentinelUndef;
5527 });
5528}
5529
5530/// Return true if Val is undef or if its value falls within the
5531/// specified range (L, H].
5532static bool isUndefOrInRange(int Val, int Low, int Hi) {
5533 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5534}
5535
5536/// Return true if every element in Mask is undef or if its value
5537/// falls within the specified range (L, H].
5538static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5539 return llvm::all_of(
5540 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5541}
5542
5543/// Return true if Val is undef, zero or if its value falls within the
5544/// specified range (L, H].
5545static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5546 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5547}
5548
5549/// Return true if every element in Mask is undef, zero or if its value
5550/// falls within the specified range (L, H].
5551static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5552 return llvm::all_of(
5553 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5554}
5555
5556/// Return true if every element in Mask, beginning
5557/// from position Pos and ending in Pos + Size, falls within the specified
5558/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5559static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5560 unsigned Size, int Low, int Step = 1) {
5561 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5562 if (!isUndefOrEqual(Mask[i], Low))
5563 return false;
5564 return true;
5565}
5566
5567/// Return true if every element in Mask, beginning
5568/// from position Pos and ending in Pos+Size, falls within the specified
5569/// sequential range (Low, Low+Size], or is undef or is zero.
5570static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5571 unsigned Size, int Low,
5572 int Step = 1) {
5573 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5574 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5575 return false;
5576 return true;
5577}
5578
5579/// Return true if every element in Mask, beginning
5580/// from position Pos and ending in Pos+Size is undef or is zero.
5581static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5582 unsigned Size) {
5583 return llvm::all_of(Mask.slice(Pos, Size),
5584 [](int M) { return isUndefOrZero(M); });
5585}
5586
5587/// Helper function to test whether a shuffle mask could be
5588/// simplified by widening the elements being shuffled.
5589///
5590/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5591/// leaves it in an unspecified state.
5592///
5593/// NOTE: This must handle normal vector shuffle masks and *target* vector
5594/// shuffle masks. The latter have the special property of a '-2' representing
5595/// a zero-ed lane of a vector.
5596static bool canWidenShuffleElements(ArrayRef<int> Mask,
5597 SmallVectorImpl<int> &WidenedMask) {
5598 WidenedMask.assign(Mask.size() / 2, 0);
5599 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5600 int M0 = Mask[i];
5601 int M1 = Mask[i + 1];
5602
5603 // If both elements are undef, its trivial.
5604 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5605 WidenedMask[i / 2] = SM_SentinelUndef;
5606 continue;
5607 }
5608
5609 // Check for an undef mask and a mask value properly aligned to fit with
5610 // a pair of values. If we find such a case, use the non-undef mask's value.
5611 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5612 WidenedMask[i / 2] = M1 / 2;
5613 continue;
5614 }
5615 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5616 WidenedMask[i / 2] = M0 / 2;
5617 continue;
5618 }
5619
5620 // When zeroing, we need to spread the zeroing across both lanes to widen.
5621 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5622 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5623 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5624 WidenedMask[i / 2] = SM_SentinelZero;
5625 continue;
5626 }
5627 return false;
5628 }
5629
5630 // Finally check if the two mask values are adjacent and aligned with
5631 // a pair.
5632 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5633 WidenedMask[i / 2] = M0 / 2;
5634 continue;
5635 }
5636
5637 // Otherwise we can't safely widen the elements used in this shuffle.
5638 return false;
5639 }
5640 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5641, __PRETTY_FUNCTION__))
5641 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5641, __PRETTY_FUNCTION__))
;
5642
5643 return true;
5644}
5645
5646static bool canWidenShuffleElements(ArrayRef<int> Mask,
5647 const APInt &Zeroable,
5648 bool V2IsZero,
5649 SmallVectorImpl<int> &WidenedMask) {
5650 // Create an alternative mask with info about zeroable elements.
5651 // Here we do not set undef elements as zeroable.
5652 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5653 if (V2IsZero) {
5654 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((!Zeroable.isNullValue() && "V2's non-undef elements are used?!"
) ? static_cast<void> (0) : __assert_fail ("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5654, __PRETTY_FUNCTION__))
;
5655 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5656 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5657 ZeroableMask[i] = SM_SentinelZero;
5658 }
5659 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5660}
5661
5662static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5663 SmallVector<int, 32> WidenedMask;
5664 return canWidenShuffleElements(Mask, WidenedMask);
5665}
5666
5667// Attempt to narrow/widen shuffle mask until it matches the target number of
5668// elements.
5669static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
5670 SmallVectorImpl<int> &ScaledMask) {
5671 unsigned NumSrcElts = Mask.size();
5672 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&((((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts
) == 0) && "Illegal shuffle scale factor") ? static_cast
<void> (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5673, __PRETTY_FUNCTION__))
5673 "Illegal shuffle scale factor")((((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts
) == 0) && "Illegal shuffle scale factor") ? static_cast
<void> (0) : __assert_fail ("((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && \"Illegal shuffle scale factor\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5673, __PRETTY_FUNCTION__))
;
5674
5675 // Narrowing is guaranteed to work.
5676 if (NumDstElts >= NumSrcElts) {
5677 int Scale = NumDstElts / NumSrcElts;
5678 llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask);
5679 return true;
5680 }
5681
5682 // We have to repeat the widening until we reach the target size, but we can
5683 // split out the first widening as it sets up ScaledMask for us.
5684 if (canWidenShuffleElements(Mask, ScaledMask)) {
5685 while (ScaledMask.size() > NumDstElts) {
5686 SmallVector<int, 16> WidenedMask;
5687 if (!canWidenShuffleElements(ScaledMask, WidenedMask))
5688 return false;
5689 ScaledMask = std::move(WidenedMask);
5690 }
5691 return true;
5692 }
5693
5694 return false;
5695}
5696
5697/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5698bool X86::isZeroNode(SDValue Elt) {
5699 return isNullConstant(Elt) || isNullFPConstant(Elt);
5700}
5701
5702// Build a vector of constants.
5703// Use an UNDEF node if MaskElt == -1.
5704// Split 64-bit constants in the 32-bit mode.
5705static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5706 const SDLoc &dl, bool IsMask = false) {
5707
5708 SmallVector<SDValue, 32> Ops;
5709 bool Split = false;
5710
5711 MVT ConstVecVT = VT;
5712 unsigned NumElts = VT.getVectorNumElements();
5713 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5714 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5715 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5716 Split = true;
5717 }
5718
5719 MVT EltVT = ConstVecVT.getVectorElementType();
5720 for (unsigned i = 0; i < NumElts; ++i) {
5721 bool IsUndef = Values[i] < 0 && IsMask;
5722 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5723 DAG.getConstant(Values[i], dl, EltVT);
5724 Ops.push_back(OpNode);
5725 if (Split)
5726 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5727 DAG.getConstant(0, dl, EltVT));
5728 }
5729 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5730 if (Split)
5731 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5732 return ConstsNode;
5733}
5734
5735static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5736 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5737 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5738, __PRETTY_FUNCTION__))
5738 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5738, __PRETTY_FUNCTION__))
;
5739 SmallVector<SDValue, 32> Ops;
5740 bool Split = false;
5741
5742 MVT ConstVecVT = VT;
5743 unsigned NumElts = VT.getVectorNumElements();
5744 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5745 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5746 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5747 Split = true;
5748 }
5749
5750 MVT EltVT = ConstVecVT.getVectorElementType();
5751 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5752 if (Undefs[i]) {
5753 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5754 continue;
5755 }
5756 const APInt &V = Bits[i];
5757 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5757, __PRETTY_FUNCTION__))
;
5758 if (Split) {
5759 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5760 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5761 } else if (EltVT == MVT::f32) {
5762 APFloat FV(APFloat::IEEEsingle(), V);
5763 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5764 } else if (EltVT == MVT::f64) {
5765 APFloat FV(APFloat::IEEEdouble(), V);
5766 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5767 } else {
5768 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5769 }
5770 }
5771
5772 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5773 return DAG.getBitcast(VT, ConstsNode);
5774}
5775
5776/// Returns a vector of specified type with all zero elements.
5777static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5778 SelectionDAG &DAG, const SDLoc &dl) {
5779 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5781, __PRETTY_FUNCTION__))
5780 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5781, __PRETTY_FUNCTION__))
5781 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5781, __PRETTY_FUNCTION__))
;
5782
5783 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5784 // type. This ensures they get CSE'd. But if the integer type is not
5785 // available, use a floating-point +0.0 instead.
5786 SDValue Vec;
5787 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5788 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5789 } else if (VT.isFloatingPoint()) {
5790 Vec = DAG.getConstantFP(+0.0, dl, VT);
5791 } else if (VT.getVectorElementType() == MVT::i1) {
5792 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5793, __PRETTY_FUNCTION__))
5793 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5793, __PRETTY_FUNCTION__))
;
5794 Vec = DAG.getConstant(0, dl, VT);
5795 } else {
5796 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5797 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5798 }
5799 return DAG.getBitcast(VT, Vec);
5800}
5801
5802static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5803 const SDLoc &dl, unsigned vectorWidth) {
5804 EVT VT = Vec.getValueType();
5805 EVT ElVT = VT.getVectorElementType();
5806 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5807 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5808 VT.getVectorNumElements()/Factor);
5809
5810 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5811 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5812 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5812, __PRETTY_FUNCTION__))
;
5813
5814 // This is the index of the first element of the vectorWidth-bit chunk
5815 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5816 IdxVal &= ~(ElemsPerChunk - 1);
5817
5818 // If the input is a buildvector just emit a smaller one.
5819 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5820 return DAG.getBuildVector(ResultVT, dl,
5821 Vec->ops().slice(IdxVal, ElemsPerChunk));
5822
5823 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5824 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5825}
5826
5827/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5828/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5829/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5830/// instructions or a simple subregister reference. Idx is an index in the
5831/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5832/// lowering EXTRACT_VECTOR_ELT operations easier.
5833static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5834 SelectionDAG &DAG, const SDLoc &dl) {
5835 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5836, __PRETTY_FUNCTION__))
5836 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5836, __PRETTY_FUNCTION__))
;
5837 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5838}
5839
5840/// Generate a DAG to grab 256-bits from a 512-bit vector.
5841static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5842 SelectionDAG &DAG, const SDLoc &dl) {
5843 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5843, __PRETTY_FUNCTION__))
;
5844 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5845}
5846
5847static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5848 SelectionDAG &DAG, const SDLoc &dl,
5849 unsigned vectorWidth) {
5850 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5851, __PRETTY_FUNCTION__))
5851 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5851, __PRETTY_FUNCTION__))
;
5852 // Inserting UNDEF is Result
5853 if (Vec.isUndef())
5854 return Result;
5855 EVT VT = Vec.getValueType();
5856 EVT ElVT = VT.getVectorElementType();
5857 EVT ResultVT = Result.getValueType();
5858
5859 // Insert the relevant vectorWidth bits.
5860 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5861 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5861, __PRETTY_FUNCTION__))
;
5862
5863 // This is the index of the first element of the vectorWidth-bit chunk
5864 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5865 IdxVal &= ~(ElemsPerChunk - 1);
5866
5867 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5868 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5869}
5870
5871/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5872/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5873/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5874/// simple superregister reference. Idx is an index in the 128 bits
5875/// we want. It need not be aligned to a 128-bit boundary. That makes
5876/// lowering INSERT_VECTOR_ELT operations easier.
5877static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5878 SelectionDAG &DAG, const SDLoc &dl) {
5879 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5879, __PRETTY_FUNCTION__))
;
5880 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5881}
5882
5883/// Widen a vector to a larger size with the same scalar type, with the new
5884/// elements either zero or undef.
5885static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5886 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5887 const SDLoc &dl) {
5888 assert(Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() &&((Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits
() && Vec.getValueType().getScalarType() == VT.getScalarType
() && "Unsupported vector widening type") ? static_cast
<void> (0) : __assert_fail ("Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5890, __PRETTY_FUNCTION__))
5889 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits
() && Vec.getValueType().getScalarType() == VT.getScalarType
() && "Unsupported vector widening type") ? static_cast
<void> (0) : __assert_fail ("Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5890, __PRETTY_FUNCTION__))
5890 "Unsupported vector widening type")((Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits
() && Vec.getValueType().getScalarType() == VT.getScalarType
() && "Unsupported vector widening type") ? static_cast
<void> (0) : __assert_fail ("Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5890, __PRETTY_FUNCTION__))
;
5891 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5892 : DAG.getUNDEF(VT);
5893 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5894 DAG.getIntPtrConstant(0, dl));
5895}
5896
5897/// Widen a vector to a larger size with the same scalar type, with the new
5898/// elements either zero or undef.
5899static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
5900 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5901 const SDLoc &dl, unsigned WideSizeInBits) {
5902 assert(Vec.getValueSizeInBits() < WideSizeInBits &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits
% Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/X86/X86ISelLowering.cpp"
, 5904, __PRETTY_FUNCTION__))
5903