Bug Summary

File:lib/Target/X86/X86ISelLowering.cpp
Warning:line 1045, column 10
Called C++ object pointer is null

Annotated Source Code

/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86ShuffleDecodeConstantPool.h"
23#include "X86TargetMachine.h"
24#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/WinEHFuncInfo.h"
39#include "llvm/IR/CallSite.h"
40#include "llvm/IR/CallingConv.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/DiagnosticInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalAlias.h"
46#include "llvm/IR/GlobalVariable.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/Intrinsics.h"
49#include "llvm/MC/MCAsmInfo.h"
50#include "llvm/MC/MCContext.h"
51#include "llvm/MC/MCExpr.h"
52#include "llvm/MC/MCSymbol.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Debug.h"
55#include "llvm/Support/ErrorHandling.h"
56#include "llvm/Support/KnownBits.h"
57#include "llvm/Support/MathExtras.h"
58#include "llvm/Target/TargetLowering.h"
59#include "llvm/Target/TargetOptions.h"
60#include <algorithm>
61#include <bitset>
62#include <cctype>
63#include <numeric>
64using namespace llvm;
65
66#define DEBUG_TYPE"x86-isel" "x86-isel"
67
68STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
69
70static cl::opt<bool> ExperimentalVectorWideningLegalization(
71 "x86-experimental-vector-widening-legalization", cl::init(false),
72 cl::desc("Enable an experimental vector type legalization through widening "
73 "rather than promotion."),
74 cl::Hidden);
75
76static cl::opt<int> ExperimentalPrefLoopAlignment(
77 "x86-experimental-pref-loop-alignment", cl::init(4),
78 cl::desc("Sets the preferable loop alignment for experiments "
79 "(the last x86-experimental-pref-loop-alignment bits"
80 " of the loop header PC will be 0)."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89/// Call this when the user attempts to do something unsupported, like
90/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
91/// report_fatal_error, so calling code should attempt to recover without
92/// crashing.
93static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
94 const char *Msg) {
95 MachineFunction &MF = DAG.getMachineFunction();
96 DAG.getContext()->diagnose(
97 DiagnosticInfoUnsupported(*MF.getFunction(), Msg, dl.getDebugLoc()));
98}
99
100X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
101 const X86Subtarget &STI)
102 : TargetLowering(TM), Subtarget(STI) {
103 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
104 X86ScalarSSEf64 = Subtarget.hasSSE2();
105 X86ScalarSSEf32 = Subtarget.hasSSE1();
106 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
107
108 // Set up the TargetLowering object.
109
110 // X86 is weird. It always uses i8 for shift amounts and setcc results.
111 setBooleanContents(ZeroOrOneBooleanContent);
112 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
113 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
114
115 // For 64-bit, since we have so many registers, use the ILP scheduler.
116 // For 32-bit, use the register pressure specific scheduling.
117 // For Atom, always use ILP scheduling.
118 if (Subtarget.isAtom())
119 setSchedulingPreference(Sched::ILP);
120 else if (Subtarget.is64Bit())
121 setSchedulingPreference(Sched::ILP);
122 else
123 setSchedulingPreference(Sched::RegPressure);
124 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
125 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
126
127 // Bypass expensive divides and use cheaper ones.
128 if (TM.getOptLevel() >= CodeGenOpt::Default) {
129 if (Subtarget.hasSlowDivide32())
130 addBypassSlowDiv(32, 8);
131 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
132 addBypassSlowDiv(64, 32);
133 }
134
135 if (Subtarget.isTargetKnownWindowsMSVC() ||
136 Subtarget.isTargetWindowsItanium()) {
137 // Setup Windows compiler runtime calls.
138 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
139 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
140 setLibcallName(RTLIB::SREM_I64, "_allrem");
141 setLibcallName(RTLIB::UREM_I64, "_aullrem");
142 setLibcallName(RTLIB::MUL_I64, "_allmul");
143 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
146 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
147 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
148 }
149
150 if (Subtarget.isTargetDarwin()) {
151 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
152 setUseUnderscoreSetJmp(false);
153 setUseUnderscoreLongJmp(false);
154 } else if (Subtarget.isTargetWindowsGNU()) {
155 // MS runtime is weird: it exports _setjmp, but longjmp!
156 setUseUnderscoreSetJmp(true);
157 setUseUnderscoreLongJmp(false);
158 } else {
159 setUseUnderscoreSetJmp(true);
160 setUseUnderscoreLongJmp(true);
161 }
162
163 // Set up the register classes.
164 addRegisterClass(MVT::i8, &X86::GR8RegClass);
165 addRegisterClass(MVT::i16, &X86::GR16RegClass);
166 addRegisterClass(MVT::i32, &X86::GR32RegClass);
167 if (Subtarget.is64Bit())
168 addRegisterClass(MVT::i64, &X86::GR64RegClass);
169
170 for (MVT VT : MVT::integer_valuetypes())
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
172
173 // We don't accept any truncstore of integer registers.
174 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
175 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
176 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
177 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
178 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
179 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
180
181 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
182
183 // SETOEQ and SETUNE require checking two conditions.
184 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
185 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
186 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
187 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
188 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
189 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
190
191 // Integer absolute.
192 if (Subtarget.hasCMov()) {
193 setOperationAction(ISD::ABS , MVT::i16 , Custom);
194 setOperationAction(ISD::ABS , MVT::i32 , Custom);
195 if (Subtarget.is64Bit())
196 setOperationAction(ISD::ABS , MVT::i64 , Custom);
197 }
198
199 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
200 // operation.
201 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
202 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
203 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
204
205 if (Subtarget.is64Bit()) {
206 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
207 // f32/f64 are legal, f80 is custom.
208 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
209 else
210 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
211 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
212 } else if (!Subtarget.useSoftFloat()) {
213 // We have an algorithm for SSE2->double, and we turn this into a
214 // 64-bit FILD followed by conditional FADD for other targets.
215 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
216 // We have an algorithm for SSE2, and we turn this into a 64-bit
217 // FILD or VCVTUSI2SS/SD for other targets.
218 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
219 }
220
221 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
222 // this operation.
223 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
224 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
225
226 if (!Subtarget.useSoftFloat()) {
227 // SSE has no i16 to fp conversion, only i32.
228 if (X86ScalarSSEf32) {
229 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
230 // f32 and f64 cases are Legal, f80 case is not
231 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
232 } else {
233 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
234 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
235 }
236 } else {
237 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
238 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
239 }
240
241 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
242 // this operation.
243 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
244 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
245
246 if (!Subtarget.useSoftFloat()) {
247 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
248 // are Legal, f80 is custom lowered.
249 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
250 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
251
252 if (X86ScalarSSEf32) {
253 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
254 // f32 and f64 cases are Legal, f80 case is not
255 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
256 } else {
257 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
258 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
259 }
260 } else {
261 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
262 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
263 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
264 }
265
266 // Handle FP_TO_UINT by promoting the destination to a larger signed
267 // conversion.
268 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
269 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
270 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
271
272 if (Subtarget.is64Bit()) {
273 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
274 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
275 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
276 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
277 } else {
278 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
279 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
280 }
281 } else if (!Subtarget.useSoftFloat()) {
282 // Since AVX is a superset of SSE3, only check for SSE here.
283 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
284 // Expand FP_TO_UINT into a select.
285 // FIXME: We would like to use a Custom expander here eventually to do
286 // the optimal thing for SSE vs. the default expansion in the legalizer.
287 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
288 else
289 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
290 // With SSE3 we can use fisttpll to convert to a signed i64; without
291 // SSE, we're stuck with a fistpll.
292 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
293
294 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
295 }
296
297 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
298 if (!X86ScalarSSEf64) {
299 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
300 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
301 if (Subtarget.is64Bit()) {
302 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
303 // Without SSE, i64->f64 goes through memory.
304 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
305 }
306 } else if (!Subtarget.is64Bit())
307 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
308
309 // Scalar integer divide and remainder are lowered to use operations that
310 // produce two results, to match the available instructions. This exposes
311 // the two-result form to trivial CSE, which is able to combine x/y and x%y
312 // into a single instruction.
313 //
314 // Scalar integer multiply-high is also lowered to use two-result
315 // operations, to match the available instructions. However, plain multiply
316 // (low) operations are left as Legal, as there are single-result
317 // instructions for this in x86. Using the two-result multiply instructions
318 // when both high and low results are needed must be arranged by dagcombine.
319 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
320 setOperationAction(ISD::MULHS, VT, Expand);
321 setOperationAction(ISD::MULHU, VT, Expand);
322 setOperationAction(ISD::SDIV, VT, Expand);
323 setOperationAction(ISD::UDIV, VT, Expand);
324 setOperationAction(ISD::SREM, VT, Expand);
325 setOperationAction(ISD::UREM, VT, Expand);
326 }
327
328 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
329 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
330 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
331 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
332 setOperationAction(ISD::BR_CC, VT, Expand);
333 setOperationAction(ISD::SELECT_CC, VT, Expand);
334 }
335 if (Subtarget.is64Bit())
336 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
337 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
338 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
340 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
341
342 setOperationAction(ISD::FREM , MVT::f32 , Expand);
343 setOperationAction(ISD::FREM , MVT::f64 , Expand);
344 setOperationAction(ISD::FREM , MVT::f80 , Expand);
345 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
346
347 // Promote the i8 variants and force them on up to i32 which has a shorter
348 // encoding.
349 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
350 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
351 if (!Subtarget.hasBMI()) {
352 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
353 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
354 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
355 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
356 if (Subtarget.is64Bit()) {
357 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
358 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
359 }
360 }
361
362 if (Subtarget.hasLZCNT()) {
363 // When promoting the i8 variants, force them to i32 for a shorter
364 // encoding.
365 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
366 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
367 } else {
368 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
369 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
370 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
371 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
372 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
373 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
374 if (Subtarget.is64Bit()) {
375 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
376 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
377 }
378 }
379
380 // Special handling for half-precision floating point conversions.
381 // If we don't have F16C support, then lower half float conversions
382 // into library calls.
383 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
384 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
385 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
386 }
387
388 // There's never any support for operations beyond MVT::f32.
389 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
390 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
391 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
392 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
393
394 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
395 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
396 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
397 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
398 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
399 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
400
401 if (Subtarget.hasPOPCNT()) {
402 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
403 } else {
404 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
405 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
406 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
407 if (Subtarget.is64Bit())
408 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
409 }
410
411 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
412
413 if (!Subtarget.hasMOVBE())
414 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
415
416 // These should be promoted to a larger select which is supported.
417 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
418 // X86 wants to expand cmov itself.
419 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
420 setOperationAction(ISD::SELECT, VT, Custom);
421 setOperationAction(ISD::SETCC, VT, Custom);
422 }
423 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
424 if (VT == MVT::i64 && !Subtarget.is64Bit())
425 continue;
426 setOperationAction(ISD::SELECT, VT, Custom);
427 setOperationAction(ISD::SETCC, VT, Custom);
428 }
429
430 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
431 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
432 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
433
434 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
435 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
436 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
437 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
438 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
439 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
440 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
441 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
442
443 // Darwin ABI issue.
444 for (auto VT : { MVT::i32, MVT::i64 }) {
445 if (VT == MVT::i64 && !Subtarget.is64Bit())
446 continue;
447 setOperationAction(ISD::ConstantPool , VT, Custom);
448 setOperationAction(ISD::JumpTable , VT, Custom);
449 setOperationAction(ISD::GlobalAddress , VT, Custom);
450 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
451 setOperationAction(ISD::ExternalSymbol , VT, Custom);
452 setOperationAction(ISD::BlockAddress , VT, Custom);
453 }
454
455 // 64-bit shl, sra, srl (iff 32-bit x86)
456 for (auto VT : { MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SHL_PARTS, VT, Custom);
460 setOperationAction(ISD::SRA_PARTS, VT, Custom);
461 setOperationAction(ISD::SRL_PARTS, VT, Custom);
462 }
463
464 if (Subtarget.hasSSE1())
465 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
466
467 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
468
469 // Expand certain atomics
470 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
471 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
472 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
473 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
477 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
478 }
479
480 if (Subtarget.hasCmpxchg16b()) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
482 }
483
484 // FIXME - use subtarget debug flags
485 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
486 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
487 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
488 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
489 }
490
491 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
492 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
493
494 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
495 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
496
497 setOperationAction(ISD::TRAP, MVT::Other, Legal);
498 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
499
500 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
501 setOperationAction(ISD::VASTART , MVT::Other, Custom);
502 setOperationAction(ISD::VAEND , MVT::Other, Expand);
503 bool Is64Bit = Subtarget.is64Bit();
504 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
505 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
506
507 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
508 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
509
510 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
511
512 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
513 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
514 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
515
516 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
517 // f32 and f64 use SSE.
518 // Set up the FP register classes.
519 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
520 : &X86::FR32RegClass);
521 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
522 : &X86::FR64RegClass);
523
524 for (auto VT : { MVT::f32, MVT::f64 }) {
525 // Use ANDPD to simulate FABS.
526 setOperationAction(ISD::FABS, VT, Custom);
527
528 // Use XORP to simulate FNEG.
529 setOperationAction(ISD::FNEG, VT, Custom);
530
531 // Use ANDPD and ORPD to simulate FCOPYSIGN.
532 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
533
534 // We don't support sin/cos/fmod
535 setOperationAction(ISD::FSIN , VT, Expand);
536 setOperationAction(ISD::FCOS , VT, Expand);
537 setOperationAction(ISD::FSINCOS, VT, Expand);
538 }
539
540 // Lower this to MOVMSK plus an AND.
541 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
542 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
543
544 // Expand FP immediates into loads from the stack, except for the special
545 // cases we handle.
546 addLegalFPImmediate(APFloat(+0.0)); // xorpd
547 addLegalFPImmediate(APFloat(+0.0f)); // xorps
548 } else if (UseX87 && X86ScalarSSEf32) {
549 // Use SSE for f32, x87 for f64.
550 // Set up the FP register classes.
551 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
552 : &X86::FR32RegClass);
553 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
554
555 // Use ANDPS to simulate FABS.
556 setOperationAction(ISD::FABS , MVT::f32, Custom);
557
558 // Use XORP to simulate FNEG.
559 setOperationAction(ISD::FNEG , MVT::f32, Custom);
560
561 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
562
563 // Use ANDPS and ORPS to simulate FCOPYSIGN.
564 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
565 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
566
567 // We don't support sin/cos/fmod
568 setOperationAction(ISD::FSIN , MVT::f32, Expand);
569 setOperationAction(ISD::FCOS , MVT::f32, Expand);
570 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
571
572 // Special cases we handle for FP constants.
573 addLegalFPImmediate(APFloat(+0.0f)); // xorps
574 addLegalFPImmediate(APFloat(+0.0)); // FLD0
575 addLegalFPImmediate(APFloat(+1.0)); // FLD1
576 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
577 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
578
579 // Always expand sin/cos functions even though x87 has an instruction.
580 setOperationAction(ISD::FSIN , MVT::f64, Expand);
581 setOperationAction(ISD::FCOS , MVT::f64, Expand);
582 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
583 } else if (UseX87) {
584 // f32 and f64 in x87.
585 // Set up the FP register classes.
586 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
587 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
588
589 for (auto VT : { MVT::f32, MVT::f64 }) {
590 setOperationAction(ISD::UNDEF, VT, Expand);
591 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
592
593 // Always expand sin/cos functions even though x87 has an instruction.
594 setOperationAction(ISD::FSIN , VT, Expand);
595 setOperationAction(ISD::FCOS , VT, Expand);
596 setOperationAction(ISD::FSINCOS, VT, Expand);
597 }
598 addLegalFPImmediate(APFloat(+0.0)); // FLD0
599 addLegalFPImmediate(APFloat(+1.0)); // FLD1
600 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
601 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
602 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
603 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
604 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
605 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
606 }
607
608 // We don't support FMA.
609 setOperationAction(ISD::FMA, MVT::f64, Expand);
610 setOperationAction(ISD::FMA, MVT::f32, Expand);
611
612 // Long double always uses X87, except f128 in MMX.
613 if (UseX87) {
614 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
615 addRegisterClass(MVT::f128, &X86::FR128RegClass);
616 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
617 setOperationAction(ISD::FABS , MVT::f128, Custom);
618 setOperationAction(ISD::FNEG , MVT::f128, Custom);
619 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
620 }
621
622 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
623 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
624 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
625 {
626 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
627 addLegalFPImmediate(TmpFlt); // FLD0
628 TmpFlt.changeSign();
629 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
630
631 bool ignored;
632 APFloat TmpFlt2(+1.0);
633 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
634 &ignored);
635 addLegalFPImmediate(TmpFlt2); // FLD1
636 TmpFlt2.changeSign();
637 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
638 }
639
640 // Always expand sin/cos functions even though x87 has an instruction.
641 setOperationAction(ISD::FSIN , MVT::f80, Expand);
642 setOperationAction(ISD::FCOS , MVT::f80, Expand);
643 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
644
645 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
646 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
647 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
648 setOperationAction(ISD::FRINT, MVT::f80, Expand);
649 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
650 setOperationAction(ISD::FMA, MVT::f80, Expand);
651 }
652
653 // Always use a library call for pow.
654 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
655 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
656 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
657
658 setOperationAction(ISD::FLOG, MVT::f80, Expand);
659 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
660 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
661 setOperationAction(ISD::FEXP, MVT::f80, Expand);
662 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
663 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
664 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
665
666 // Some FP actions are always expanded for vector types.
667 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
668 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
669 setOperationAction(ISD::FSIN, VT, Expand);
670 setOperationAction(ISD::FSINCOS, VT, Expand);
671 setOperationAction(ISD::FCOS, VT, Expand);
672 setOperationAction(ISD::FREM, VT, Expand);
673 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
674 setOperationAction(ISD::FPOW, VT, Expand);
675 setOperationAction(ISD::FLOG, VT, Expand);
676 setOperationAction(ISD::FLOG2, VT, Expand);
677 setOperationAction(ISD::FLOG10, VT, Expand);
678 setOperationAction(ISD::FEXP, VT, Expand);
679 setOperationAction(ISD::FEXP2, VT, Expand);
680 }
681
682 // First set operation action for all vector types to either promote
683 // (for widening) or expand (for scalarization). Then we will selectively
684 // turn on ones that can be effectively codegen'd.
685 for (MVT VT : MVT::vector_valuetypes()) {
686 setOperationAction(ISD::SDIV, VT, Expand);
687 setOperationAction(ISD::UDIV, VT, Expand);
688 setOperationAction(ISD::SREM, VT, Expand);
689 setOperationAction(ISD::UREM, VT, Expand);
690 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
691 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
692 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
693 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
694 setOperationAction(ISD::FMA, VT, Expand);
695 setOperationAction(ISD::FFLOOR, VT, Expand);
696 setOperationAction(ISD::FCEIL, VT, Expand);
697 setOperationAction(ISD::FTRUNC, VT, Expand);
698 setOperationAction(ISD::FRINT, VT, Expand);
699 setOperationAction(ISD::FNEARBYINT, VT, Expand);
700 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
701 setOperationAction(ISD::MULHS, VT, Expand);
702 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
703 setOperationAction(ISD::MULHU, VT, Expand);
704 setOperationAction(ISD::SDIVREM, VT, Expand);
705 setOperationAction(ISD::UDIVREM, VT, Expand);
706 setOperationAction(ISD::CTPOP, VT, Expand);
707 setOperationAction(ISD::CTTZ, VT, Expand);
708 setOperationAction(ISD::CTLZ, VT, Expand);
709 setOperationAction(ISD::ROTL, VT, Expand);
710 setOperationAction(ISD::ROTR, VT, Expand);
711 setOperationAction(ISD::BSWAP, VT, Expand);
712 setOperationAction(ISD::SETCC, VT, Expand);
713 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
714 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
715 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
716 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
717 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
718 setOperationAction(ISD::TRUNCATE, VT, Expand);
719 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
720 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
721 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
722 setOperationAction(ISD::SELECT_CC, VT, Expand);
723 for (MVT InnerVT : MVT::vector_valuetypes()) {
724 setTruncStoreAction(InnerVT, VT, Expand);
725
726 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
727 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
728
729 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
730 // types, we have to deal with them whether we ask for Expansion or not.
731 // Setting Expand causes its own optimisation problems though, so leave
732 // them legal.
733 if (VT.getVectorElementType() == MVT::i1)
734 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
735
736 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
737 // split/scalarized right now.
738 if (VT.getVectorElementType() == MVT::f16)
739 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
740 }
741 }
742
743 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
744 // with -msoft-float, disable use of MMX as well.
745 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
746 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
747 // No operations on x86mmx supported, everything uses intrinsics.
748 }
749
750 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
751 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
752 : &X86::VR128RegClass);
753
754 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
755 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
756 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
757 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
758 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
759 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
760 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
761 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
762 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
763 }
764
765 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
766 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
767 : &X86::VR128RegClass);
768
769 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
770 // registers cannot be used even for integer operations.
771 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
772 : &X86::VR128RegClass);
773 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
774 : &X86::VR128RegClass);
775 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
776 : &X86::VR128RegClass);
777 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
778 : &X86::VR128RegClass);
779
780 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
781 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
782 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
783 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
784 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
785 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
786 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
787 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
788 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
789 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
790 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
791 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
792 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
793
794 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
795 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
796 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
797 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
798
799 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
800 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
801 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
802
803 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
804 setOperationAction(ISD::SETCC, VT, Custom);
805 setOperationAction(ISD::CTPOP, VT, Custom);
806 setOperationAction(ISD::CTTZ, VT, Custom);
807 }
808
809 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
810 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
811 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
812 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
813 setOperationAction(ISD::VSELECT, VT, Custom);
814 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
815 }
816
817 // We support custom legalizing of sext and anyext loads for specific
818 // memory vector types which we can load as a scalar (or sequence of
819 // scalars) and extend in-register to a legal 128-bit vector type. For sext
820 // loads these must work with a single scalar load.
821 for (MVT VT : MVT::integer_vector_valuetypes()) {
822 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
823 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
824 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
825 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
826 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
827 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
828 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
829 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
830 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
831 }
832
833 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
834 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
835 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
836 setOperationAction(ISD::VSELECT, VT, Custom);
837
838 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
839 continue;
840
841 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
842 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
843 }
844
845 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
846 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
847 setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
848 setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
849 setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
850 setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
851 setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
852 }
853
854 // Custom lower v2i64 and v2f64 selects.
855 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
856 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
857
858 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
859 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
860
861 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
862 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
863
864 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
865 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
866 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
867
868 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
869 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
870
871 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
872 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
873
874 for (MVT VT : MVT::fp_vector_valuetypes())
875 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
876
877 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
878 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
879 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
880
881 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
882 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
883 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
884
885 // In the customized shift lowering, the legal v4i32/v2i64 cases
886 // in AVX2 will be recognized.
887 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
888 setOperationAction(ISD::SRL, VT, Custom);
889 setOperationAction(ISD::SHL, VT, Custom);
890 setOperationAction(ISD::SRA, VT, Custom);
891 }
892 }
893
894 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
895 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
896 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
897 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
898 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
899 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
900 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
901 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
902 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
903 }
904
905 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
906 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
907 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
908 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
909 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
910 setOperationAction(ISD::FRINT, RoundedTy, Legal);
911 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
912 }
913
914 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
915 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
916 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
917 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
918 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
919 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
920 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
921 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
922
923 // FIXME: Do we need to handle scalar-to-vector here?
924 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
925
926 // We directly match byte blends in the backend as they match the VSELECT
927 // condition form.
928 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
929
930 // SSE41 brings specific instructions for doing vector sign extend even in
931 // cases where we don't have SRA.
932 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
933 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
934 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
935 }
936
937 for (MVT VT : MVT::integer_vector_valuetypes()) {
938 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
939 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
940 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
941 }
942
943 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
944 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
945 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
946 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
947 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
948 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
949 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
950 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
951 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
952 }
953
954 // i8 vectors are custom because the source register and source
955 // source memory operand types are not the same width.
956 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
957 }
958
959 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
960 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
961 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
962 setOperationAction(ISD::ROTL, VT, Custom);
963
964 // XOP can efficiently perform BITREVERSE with VPPERM.
965 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
966 setOperationAction(ISD::BITREVERSE, VT, Custom);
967
968 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
969 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
970 setOperationAction(ISD::BITREVERSE, VT, Custom);
971 }
972
973 if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
974 bool HasInt256 = Subtarget.hasInt256();
975
976 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
977 : &X86::VR256RegClass);
978 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
979 : &X86::VR256RegClass);
980 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
981 : &X86::VR256RegClass);
982 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
983 : &X86::VR256RegClass);
984 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
985 : &X86::VR256RegClass);
986 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
987 : &X86::VR256RegClass);
988
989 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
990 setOperationAction(ISD::FFLOOR, VT, Legal);
991 setOperationAction(ISD::FCEIL, VT, Legal);
992 setOperationAction(ISD::FTRUNC, VT, Legal);
993 setOperationAction(ISD::FRINT, VT, Legal);
994 setOperationAction(ISD::FNEARBYINT, VT, Legal);
995 setOperationAction(ISD::FNEG, VT, Custom);
996 setOperationAction(ISD::FABS, VT, Custom);
997 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
998 }
999
1000 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1001 // even though v8i16 is a legal type.
1002 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1003 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1004 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1005
1006 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1007 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1008 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1009
1010 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1011 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1012
1013 for (MVT VT : MVT::fp_vector_valuetypes())
1014 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1015
1016 // In the customized shift lowering, the legal v8i32/v4i64 cases
1017 // in AVX2 will be recognized.
1018 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1019 setOperationAction(ISD::SRL, VT, Custom);
1020 setOperationAction(ISD::SHL, VT, Custom);
1021 setOperationAction(ISD::SRA, VT, Custom);
1022 }
1023
1024 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1025 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1026 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1027
1028 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1029 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1030 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1031 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1032 }
1033
1034 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1035 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1036 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1037 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1038
1039 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1040 setOperationAction(ISD::SETCC, VT, Custom);
1041 setOperationAction(ISD::CTPOP, VT, Custom);
1042 setOperationAction(ISD::CTTZ, VT, Custom);
1043 setOperationAction(ISD::CTLZ, VT, Custom);
1044 }
1045
1046 if (Subtarget.hasAnyFMA()) {
1047 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1048 MVT::v2f64, MVT::v4f64 })
1049 setOperationAction(ISD::FMA, VT, Legal);
1050 }
1051
1052 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1053 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1054 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1055 }
1056
1057 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1058 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1059 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1060 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1061
1062 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1063 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1064
1065 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1066 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1067 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1068 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1069
1070 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1071 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1072 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1073 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1074 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1075 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1076 }
1077
1078 if (HasInt256) {
1079 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
1080 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
1081 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
1082
1083 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1084 // when we have a 256bit-wide blend with immediate.
1085 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1086
1087 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1088 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1089 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1090 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1091 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1092 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1093 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1094 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1095 }
1096 }
1097
1098 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1099 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1100 setOperationAction(ISD::MLOAD, VT, Legal);
1101 setOperationAction(ISD::MSTORE, VT, Legal);
1102 }
1103
1104 // Extract subvector is special because the value type
1105 // (result) is 128-bit but the source is 256-bit wide.
1106 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1107 MVT::v4f32, MVT::v2f64 }) {
1108 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1109 }
1110
1111 // Custom lower several nodes for 256-bit types.
1112 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1113 MVT::v8f32, MVT::v4f64 }) {
1114 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1115 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1116 setOperationAction(ISD::VSELECT, VT, Custom);
1117 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1118 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1119 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1120 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1121 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1122 }
1123
1124 if (HasInt256)
1125 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1126
1127 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1128 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1129 setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
1130 setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
1131 setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
1132 setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
1133 setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
1134 }
1135 }
1136
1137 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1138 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1139 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1140 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1141 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1142
1143 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1144 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1145 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1146
1147 for (MVT VT : MVT::fp_vector_valuetypes())
1148 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1149
1150 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1151 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1152 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1153 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1154 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1155 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1156 }
1157
1158 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
1159 MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
1160 MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
1161 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
1162 setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
1163 setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
1164 setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom);
1165 setTruncStoreAction(VT, MaskVT, Custom);
1166 }
1167
1168 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1169 setOperationAction(ISD::FNEG, VT, Custom);
1170 setOperationAction(ISD::FABS, VT, Custom);
1171 setOperationAction(ISD::FMA, VT, Legal);
1172 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1173 }
1174
1175 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1176 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1177 setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Legal);
1178 setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Legal);
1179 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1180 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1181 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1182 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1183 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1184 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1185 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1186 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1187 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1188 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1189 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1190 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1191 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1192 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1193 setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
1194 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1195 setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
1196 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1197 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1198 setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
1199 setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
1200 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1201 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1202
1203 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1204 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1205 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1206 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1207 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1208 if (Subtarget.hasVLX()){
1209 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1210 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1211 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1212 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1213 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1214
1215 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1216 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1217 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1218 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1219 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1220 } else {
1221 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1222 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1223 setOperationAction(ISD::MLOAD, VT, Custom);
1224 setOperationAction(ISD::MSTORE, VT, Custom);
1225 }
1226 }
1227
1228 if (Subtarget.hasDQI()) {
1229 for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) {
1230 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1231 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1232 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1233 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1234 }
1235 if (Subtarget.hasVLX()) {
1236 // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
1237 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1238 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1239 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1240 }
1241 }
1242 if (Subtarget.hasVLX()) {
1243 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1244 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1245 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1246 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1247 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1248 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1249 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1250 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
1251 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
1252 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1253 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1254 }
1255
1256 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1257 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1258 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1259 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1260 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1261 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1262 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1263 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1264 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1265 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1266 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1267
1268 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1269 setOperationAction(ISD::FFLOOR, VT, Legal);
1270 setOperationAction(ISD::FCEIL, VT, Legal);
1271 setOperationAction(ISD::FTRUNC, VT, Legal);
1272 setOperationAction(ISD::FRINT, VT, Legal);
1273 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1274 }
1275
1276 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
1277 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
1278
1279 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1280 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1281 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1282
1283 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1284 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1285 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1286 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1287 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1288
1289 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1290 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1291
1292 setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
1293 setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
1294
1295 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1296 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1297 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1298 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1299 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1300 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1301
1302
1303 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1304 setOperationAction(ISD::ABS, MVT::v4i64, Legal);
1305 setOperationAction(ISD::ABS, MVT::v2i64, Legal);
1306
1307 for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
1308 setOperationAction(ISD::ADD, VT, Custom);
1309 setOperationAction(ISD::SUB, VT, Custom);
1310 setOperationAction(ISD::MUL, VT, Custom);
1311 setOperationAction(ISD::SETCC, VT, Custom);
1312 setOperationAction(ISD::SELECT, VT, Custom);
1313 setOperationAction(ISD::TRUNCATE, VT, Custom);
1314
1315 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1316 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1317 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1318 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1319 setOperationAction(ISD::VSELECT, VT, Expand);
1320 }
1321
1322 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1323 setOperationAction(ISD::SMAX, VT, Legal);
1324 setOperationAction(ISD::UMAX, VT, Legal);
1325 setOperationAction(ISD::SMIN, VT, Legal);
1326 setOperationAction(ISD::UMIN, VT, Legal);
1327 setOperationAction(ISD::ABS, VT, Legal);
1328 setOperationAction(ISD::SRL, VT, Custom);
1329 setOperationAction(ISD::SHL, VT, Custom);
1330 setOperationAction(ISD::SRA, VT, Custom);
1331 setOperationAction(ISD::CTPOP, VT, Custom);
1332 setOperationAction(ISD::CTTZ, VT, Custom);
1333 }
1334
1335 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1336 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
1337 MVT::v8i64}) {
1338 setOperationAction(ISD::ROTL, VT, Custom);
1339 setOperationAction(ISD::ROTR, VT, Custom);
1340 }
1341
1342 // Need to promote to 64-bit even though we have 32-bit masked instructions
1343 // because the IR optimizers rearrange bitcasts around logic ops leaving
1344 // too many variations to handle if we don't promote them.
1345 setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
1346 setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
1347 setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
1348
1349 if (Subtarget.hasCDI()) {
1350 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1351 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
1352 MVT::v4i64, MVT::v8i64}) {
1353 setOperationAction(ISD::CTLZ, VT, Legal);
1354 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1355 }
1356 } // Subtarget.hasCDI()
1357
1358 if (Subtarget.hasDQI()) {
1359 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1360 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1361 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1362 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1363 }
1364
1365 if (Subtarget.hasVPOPCNTDQ()) {
1366 // VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512
1367 // version of popcntd/q.
1368 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64,
1369 MVT::v4i32, MVT::v2i64})
1370 setOperationAction(ISD::CTPOP, VT, Legal);
1371 }
1372
1373 // Custom lower several nodes.
1374 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1375 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1376 setOperationAction(ISD::MGATHER, VT, Custom);
1377 setOperationAction(ISD::MSCATTER, VT, Custom);
1378 }
1379
1380 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v1i1, Legal);
1381
1382 // Extract subvector is special because the value type
1383 // (result) is 256-bit but the source is 512-bit wide.
1384 // 128-bit was made Legal under AVX1.
1385 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1386 MVT::v8f32, MVT::v4f64 })
1387 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1388 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
1389 MVT::v16i1, MVT::v32i1, MVT::v64i1 })
1390 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1391
1392 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1393 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1394 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1395 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1396 setOperationAction(ISD::VSELECT, VT, Custom);
1397 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1398 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1399 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1400 setOperationAction(ISD::MLOAD, VT, Legal);
1401 setOperationAction(ISD::MSTORE, VT, Legal);
1402 setOperationAction(ISD::MGATHER, VT, Legal);
1403 setOperationAction(ISD::MSCATTER, VT, Custom);
1404 }
1405 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1406 setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
1407 setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
1408 }
1409 }// has AVX-512
1410
1411 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1412 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1413 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1414
1415 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1416 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1417
1418 setOperationAction(ISD::ADD, MVT::v32i1, Custom);
1419 setOperationAction(ISD::ADD, MVT::v64i1, Custom);
1420 setOperationAction(ISD::SUB, MVT::v32i1, Custom);
1421 setOperationAction(ISD::SUB, MVT::v64i1, Custom);
1422 setOperationAction(ISD::MUL, MVT::v32i1, Custom);
1423 setOperationAction(ISD::MUL, MVT::v64i1, Custom);
1424
1425 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1426 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1427 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1428 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1429 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1430 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1431 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1432 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1433 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1434 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1435 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1436 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1437 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1438 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1439 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1440 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1441 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1442 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1443 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom);
1444 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom);
1445 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1446 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1447 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1448 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1449 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1450 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1451 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1452 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1453 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1454 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1455 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1456 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1457 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1458 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1459 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1460 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1461 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1462 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1463 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1464 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1465 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1466 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1467 setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom);
1468 setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom);
1469 setOperationAction(ISD::VSELECT, MVT::v32i1, Expand);
1470 setOperationAction(ISD::VSELECT, MVT::v64i1, Expand);
1471 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1472
1473 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1474
1475 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1476 if (Subtarget.hasVLX()) {
1477 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1478 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1479 }
1480
1481 LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
1482 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1483 setOperationAction(ISD::MLOAD, VT, Action);
1484 setOperationAction(ISD::MSTORE, VT, Action);
1485 }
1486
1487 if (Subtarget.hasCDI()) {
1488 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
1489 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
1490 }
1491
1492 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1493 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1494 setOperationAction(ISD::VSELECT, VT, Custom);
1495 setOperationAction(ISD::ABS, VT, Legal);
1496 setOperationAction(ISD::SRL, VT, Custom);
1497 setOperationAction(ISD::SHL, VT, Custom);
1498 setOperationAction(ISD::SRA, VT, Custom);
1499 setOperationAction(ISD::MLOAD, VT, Legal);
1500 setOperationAction(ISD::MSTORE, VT, Legal);
1501 setOperationAction(ISD::CTPOP, VT, Custom);
1502 setOperationAction(ISD::CTTZ, VT, Custom);
1503 setOperationAction(ISD::SMAX, VT, Legal);
1504 setOperationAction(ISD::UMAX, VT, Legal);
1505 setOperationAction(ISD::SMIN, VT, Legal);
1506 setOperationAction(ISD::UMIN, VT, Legal);
1507
1508 setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
1509 setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
1510 setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
1511 }
1512
1513 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1514 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1515 }
1516 }
1517
1518 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1519 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1520 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1521
1522 for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
1523 setOperationAction(ISD::ADD, VT, Custom);
1524 setOperationAction(ISD::SUB, VT, Custom);
1525 setOperationAction(ISD::MUL, VT, Custom);
1526 setOperationAction(ISD::VSELECT, VT, Expand);
1527
1528 setOperationAction(ISD::TRUNCATE, VT, Custom);
1529 setOperationAction(ISD::SETCC, VT, Custom);
1530 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1531 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1532 setOperationAction(ISD::SELECT, VT, Custom);
1533 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1534 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1535 }
1536
1537 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1538 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1539 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1540 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1541
1542 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1543 setOperationAction(ISD::SMAX, VT, Legal);
1544 setOperationAction(ISD::UMAX, VT, Legal);
1545 setOperationAction(ISD::SMIN, VT, Legal);
1546 setOperationAction(ISD::UMIN, VT, Legal);
1547 }
1548 }
1549
1550 // We want to custom lower some of our intrinsics.
1551 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1552 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1553 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1554 if (!Subtarget.is64Bit()) {
1555 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1556 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1557 }
1558
1559 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1560 // handle type legalization for these operations here.
1561 //
1562 // FIXME: We really should do custom legalization for addition and
1563 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1564 // than generic legalization for 64-bit multiplication-with-overflow, though.
1565 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1566 if (VT == MVT::i64 && !Subtarget.is64Bit())
1567 continue;
1568 // Add/Sub/Mul with overflow operations are custom lowered.
1569 setOperationAction(ISD::SADDO, VT, Custom);
1570 setOperationAction(ISD::UADDO, VT, Custom);
1571 setOperationAction(ISD::SSUBO, VT, Custom);
1572 setOperationAction(ISD::USUBO, VT, Custom);
1573 setOperationAction(ISD::SMULO, VT, Custom);
1574 setOperationAction(ISD::UMULO, VT, Custom);
1575
1576 // Support carry in as value rather than glue.
1577 setOperationAction(ISD::ADDCARRY, VT, Custom);
1578 setOperationAction(ISD::SUBCARRY, VT, Custom);
1579 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1580 }
1581
1582 if (!Subtarget.is64Bit()) {
1583 // These libcalls are not available in 32-bit.
1584 setLibcallName(RTLIB::SHL_I128, nullptr);
1585 setLibcallName(RTLIB::SRL_I128, nullptr);
1586 setLibcallName(RTLIB::SRA_I128, nullptr);
1587 setLibcallName(RTLIB::MUL_I128, nullptr);
1588 }
1589
1590 // Combine sin / cos into one node or libcall if possible.
1591 if (Subtarget.hasSinCos()) {
1592 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1593 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1594 if (Subtarget.isTargetDarwin()) {
1595 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1596 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1597 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1598 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1599 }
1600 }
1601
1602 if (Subtarget.isTargetWin64()) {
1603 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1604 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1605 setOperationAction(ISD::SREM, MVT::i128, Custom);
1606 setOperationAction(ISD::UREM, MVT::i128, Custom);
1607 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1608 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1609 }
1610
1611 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1612 // is. We should promote the value to 64-bits to solve this.
1613 // This is what the CRT headers do - `fmodf` is an inline header
1614 // function casting to f64 and calling `fmod`.
1615 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1616 Subtarget.isTargetWindowsItanium()))
1617 for (ISD::NodeType Op :
1618 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1619 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1620 if (isOperationExpand(Op, MVT::f32))
1621 setOperationAction(Op, MVT::f32, Promote);
1622
1623 // We have target-specific dag combine patterns for the following nodes:
1624 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1625 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1626 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1627 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1628 setTargetDAGCombine(ISD::BITCAST);
1629 setTargetDAGCombine(ISD::VSELECT);
1630 setTargetDAGCombine(ISD::SELECT);
1631 setTargetDAGCombine(ISD::SHL);
1632 setTargetDAGCombine(ISD::SRA);
1633 setTargetDAGCombine(ISD::SRL);
1634 setTargetDAGCombine(ISD::OR);
1635 setTargetDAGCombine(ISD::AND);
1636 setTargetDAGCombine(ISD::ADD);
1637 setTargetDAGCombine(ISD::FADD);
1638 setTargetDAGCombine(ISD::FSUB);
1639 setTargetDAGCombine(ISD::FNEG);
1640 setTargetDAGCombine(ISD::FMA);
1641 setTargetDAGCombine(ISD::FMINNUM);
1642 setTargetDAGCombine(ISD::FMAXNUM);
1643 setTargetDAGCombine(ISD::SUB);
1644 setTargetDAGCombine(ISD::LOAD);
1645 setTargetDAGCombine(ISD::MLOAD);
1646 setTargetDAGCombine(ISD::STORE);
1647 setTargetDAGCombine(ISD::MSTORE);
1648 setTargetDAGCombine(ISD::TRUNCATE);
1649 setTargetDAGCombine(ISD::ZERO_EXTEND);
1650 setTargetDAGCombine(ISD::ANY_EXTEND);
1651 setTargetDAGCombine(ISD::SIGN_EXTEND);
1652 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1653 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1654 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1655 setTargetDAGCombine(ISD::SINT_TO_FP);
1656 setTargetDAGCombine(ISD::UINT_TO_FP);
1657 setTargetDAGCombine(ISD::SETCC);
1658 setTargetDAGCombine(ISD::MUL);
1659 setTargetDAGCombine(ISD::XOR);
1660 setTargetDAGCombine(ISD::MSCATTER);
1661 setTargetDAGCombine(ISD::MGATHER);
1662
1663 computeRegisterProperties(Subtarget.getRegisterInfo());
1664
1665 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1666 MaxStoresPerMemsetOptSize = 8;
1667 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1668 MaxStoresPerMemcpyOptSize = 4;
1669 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1670 MaxStoresPerMemmoveOptSize = 4;
1671
1672 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1673 // that needs to benchmarked and balanced with the potential use of vector
1674 // load/store types (PR33329, PR33914).
1675 MaxLoadsPerMemcmp = 2;
1676 MaxLoadsPerMemcmpOptSize = 2;
1677
1678 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1679 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1680
1681 // An out-of-order CPU can speculatively execute past a predictable branch,
1682 // but a conditional move could be stalled by an expensive earlier operation.
1683 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1684 EnableExtLdPromotion = true;
1685 setPrefFunctionAlignment(4); // 2^4 bytes.
1686
1687 verifyIntrinsicTables();
1688}
1689
1690// This has so far only been implemented for 64-bit MachO.
1691bool X86TargetLowering::useLoadStackGuardNode() const {
1692 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1693}
1694
1695TargetLoweringBase::LegalizeTypeAction
1696X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1697 if (ExperimentalVectorWideningLegalization &&
1698 VT.getVectorNumElements() != 1 &&
1699 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1700 return TypeWidenVector;
1701
1702 return TargetLoweringBase::getPreferredVectorAction(VT);
1703}
1704
1705EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1706 LLVMContext& Context,
1707 EVT VT) const {
1708 if (!VT.isVector())
1709 return MVT::i8;
1710
1711 if (VT.isSimple()) {
1712 MVT VVT = VT.getSimpleVT();
1713 const unsigned NumElts = VVT.getVectorNumElements();
1714 MVT EltVT = VVT.getVectorElementType();
1715 if (VVT.is512BitVector()) {
1716 if (Subtarget.hasAVX512())
1717 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1718 EltVT == MVT::f32 || EltVT == MVT::f64)
1719 switch(NumElts) {
1720 case 8: return MVT::v8i1;
1721 case 16: return MVT::v16i1;
1722 }
1723 if (Subtarget.hasBWI())
1724 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1725 switch(NumElts) {
1726 case 32: return MVT::v32i1;
1727 case 64: return MVT::v64i1;
1728 }
1729 }
1730
1731 if (Subtarget.hasBWI() && Subtarget.hasVLX())
1732 return MVT::getVectorVT(MVT::i1, NumElts);
1733
1734 if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) {
1735 EVT LegalVT = getTypeToTransformTo(Context, VT);
1736 EltVT = LegalVT.getVectorElementType().getSimpleVT();
1737 }
1738
1739 if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32)
1740 switch(NumElts) {
1741 case 2: return MVT::v2i1;
1742 case 4: return MVT::v4i1;
1743 case 8: return MVT::v8i1;
1744 }
1745 }
1746
1747 return VT.changeVectorElementTypeToInteger();
1748}
1749
1750/// Helper for getByValTypeAlignment to determine
1751/// the desired ByVal argument alignment.
1752static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1753 if (MaxAlign == 16)
1754 return;
1755 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1756 if (VTy->getBitWidth() == 128)
1757 MaxAlign = 16;
1758 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1759 unsigned EltAlign = 0;
1760 getMaxByValAlign(ATy->getElementType(), EltAlign);
1761 if (EltAlign > MaxAlign)
1762 MaxAlign = EltAlign;
1763 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1764 for (auto *EltTy : STy->elements()) {
1765 unsigned EltAlign = 0;
1766 getMaxByValAlign(EltTy, EltAlign);
1767 if (EltAlign > MaxAlign)
1768 MaxAlign = EltAlign;
1769 if (MaxAlign == 16)
1770 break;
1771 }
1772 }
1773}
1774
1775/// Return the desired alignment for ByVal aggregate
1776/// function arguments in the caller parameter area. For X86, aggregates
1777/// that contain SSE vectors are placed at 16-byte boundaries while the rest
1778/// are at 4-byte boundaries.
1779unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1780 const DataLayout &DL) const {
1781 if (Subtarget.is64Bit()) {
1782 // Max of 8 and alignment of type.
1783 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1784 if (TyAlign > 8)
1785 return TyAlign;
1786 return 8;
1787 }
1788
1789 unsigned Align = 4;
1790 if (Subtarget.hasSSE1())
1791 getMaxByValAlign(Ty, Align);
1792 return Align;
1793}
1794
1795/// Returns the target specific optimal type for load
1796/// and store operations as a result of memset, memcpy, and memmove
1797/// lowering. If DstAlign is zero that means it's safe to destination
1798/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1799/// means there isn't a need to check it against alignment requirement,
1800/// probably because the source does not need to be loaded. If 'IsMemset' is
1801/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1802/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1803/// source is constant so it does not need to be loaded.
1804/// It returns EVT::Other if the type should be determined using generic
1805/// target-independent logic.
1806EVT
1807X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1808 unsigned DstAlign, unsigned SrcAlign,
1809 bool IsMemset, bool ZeroMemset,
1810 bool MemcpyStrSrc,
1811 MachineFunction &MF) const {
1812 const Function *F = MF.getFunction();
1813 if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1814 if (Size >= 16 &&
1815 (!Subtarget.isUnalignedMem16Slow() ||
1816 ((DstAlign == 0 || DstAlign >= 16) &&
1817 (SrcAlign == 0 || SrcAlign >= 16)))) {
1818 // FIXME: Check if unaligned 32-byte accesses are slow.
1819 if (Size >= 32 && Subtarget.hasAVX()) {
1820 // Although this isn't a well-supported type for AVX1, we'll let
1821 // legalization and shuffle lowering produce the optimal codegen. If we
1822 // choose an optimal type with a vector element larger than a byte,
1823 // getMemsetStores() may create an intermediate splat (using an integer
1824 // multiply) before we splat as a vector.
1825 return MVT::v32i8;
1826 }
1827 if (Subtarget.hasSSE2())
1828 return MVT::v16i8;
1829 // TODO: Can SSE1 handle a byte vector?
1830 if (Subtarget.hasSSE1())
1831 return MVT::v4f32;
1832 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
1833 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
1834 // Do not use f64 to lower memcpy if source is string constant. It's
1835 // better to use i32 to avoid the loads.
1836 // Also, do not use f64 to lower memset unless this is a memset of zeros.
1837 // The gymnastics of splatting a byte value into an XMM register and then
1838 // only using 8-byte stores (because this is a CPU with slow unaligned
1839 // 16-byte accesses) makes that a loser.
1840 return MVT::f64;
1841 }
1842 }
1843 // This is a compromise. If we reach here, unaligned accesses may be slow on
1844 // this target. However, creating smaller, aligned accesses could be even
1845 // slower and would certainly be a lot more code.
1846 if (Subtarget.is64Bit() && Size >= 8)
1847 return MVT::i64;
1848 return MVT::i32;
1849}
1850
1851bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1852 if (VT == MVT::f32)
1853 return X86ScalarSSEf32;
1854 else if (VT == MVT::f64)
1855 return X86ScalarSSEf64;
1856 return true;
1857}
1858
1859bool
1860X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1861 unsigned,
1862 unsigned,
1863 bool *Fast) const {
1864 if (Fast) {
1865 switch (VT.getSizeInBits()) {
1866 default:
1867 // 8-byte and under are always assumed to be fast.
1868 *Fast = true;
1869 break;
1870 case 128:
1871 *Fast = !Subtarget.isUnalignedMem16Slow();
1872 break;
1873 case 256:
1874 *Fast = !Subtarget.isUnalignedMem32Slow();
1875 break;
1876 // TODO: What about AVX-512 (512-bit) accesses?
1877 }
1878 }
1879 // Misaligned accesses of any size are always allowed.
1880 return true;
1881}
1882
1883/// Return the entry encoding for a jump table in the
1884/// current function. The returned value is a member of the
1885/// MachineJumpTableInfo::JTEntryKind enum.
1886unsigned X86TargetLowering::getJumpTableEncoding() const {
1887 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1888 // symbol.
1889 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
1890 return MachineJumpTableInfo::EK_Custom32;
1891
1892 // Otherwise, use the normal jump table encoding heuristics.
1893 return TargetLowering::getJumpTableEncoding();
1894}
1895
1896bool X86TargetLowering::useSoftFloat() const {
1897 return Subtarget.useSoftFloat();
1898}
1899
1900void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
1901 ArgListTy &Args) const {
1902
1903 // Only relabel X86-32 for C / Stdcall CCs.
1904 if (Subtarget.is64Bit())
1905 return;
1906 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
1907 return;
1908 unsigned ParamRegs = 0;
1909 if (auto *M = MF->getFunction()->getParent())
1910 ParamRegs = M->getNumberRegisterParameters();
1911
1912 // Mark the first N int arguments as having reg
1913 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
1914 Type *T = Args[Idx].Ty;
1915 if (T->isPointerTy() || T->isIntegerTy())
1916 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
1917 unsigned numRegs = 1;
1918 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
1919 numRegs = 2;
1920 if (ParamRegs < numRegs)
1921 return;
1922 ParamRegs -= numRegs;
1923 Args[Idx].IsInReg = true;
1924 }
1925 }
1926}
1927
1928const MCExpr *
1929X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1930 const MachineBasicBlock *MBB,
1931 unsigned uid,MCContext &Ctx) const{
1932 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 1932, __PRETTY_FUNCTION__))
;
1933 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1934 // entries.
1935 return MCSymbolRefExpr::create(MBB->getSymbol(),
1936 MCSymbolRefExpr::VK_GOTOFF, Ctx);
1937}
1938
1939/// Returns relocation base for the given PIC jumptable.
1940SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
1941 SelectionDAG &DAG) const {
1942 if (!Subtarget.is64Bit())
1943 // This doesn't have SDLoc associated with it, but is not really the
1944 // same as a Register.
1945 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
1946 getPointerTy(DAG.getDataLayout()));
1947 return Table;
1948}
1949
1950/// This returns the relocation base for the given PIC jumptable,
1951/// the same as getPICJumpTableRelocBase, but as an MCExpr.
1952const MCExpr *X86TargetLowering::
1953getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
1954 MCContext &Ctx) const {
1955 // X86-64 uses RIP relative addressing based on the jump table label.
1956 if (Subtarget.isPICStyleRIPRel())
1957 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
1958
1959 // Otherwise, the reference is relative to the PIC base.
1960 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
1961}
1962
1963std::pair<const TargetRegisterClass *, uint8_t>
1964X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1965 MVT VT) const {
1966 const TargetRegisterClass *RRC = nullptr;
1967 uint8_t Cost = 1;
1968 switch (VT.SimpleTy) {
1969 default:
1970 return TargetLowering::findRepresentativeClass(TRI, VT);
1971 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
1972 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
1973 break;
1974 case MVT::x86mmx:
1975 RRC = &X86::VR64RegClass;
1976 break;
1977 case MVT::f32: case MVT::f64:
1978 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1979 case MVT::v4f32: case MVT::v2f64:
1980 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
1981 case MVT::v8f32: case MVT::v4f64:
1982 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
1983 case MVT::v16f32: case MVT::v8f64:
1984 RRC = &X86::VR128XRegClass;
1985 break;
1986 }
1987 return std::make_pair(RRC, Cost);
1988}
1989
1990unsigned X86TargetLowering::getAddressSpace() const {
1991 if (Subtarget.is64Bit())
1992 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
1993 return 256;
1994}
1995
1996static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
1997 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
1998 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
1999}
2000
2001static Constant* SegmentOffset(IRBuilder<> &IRB,
2002 unsigned Offset, unsigned AddressSpace) {
2003 return ConstantExpr::getIntToPtr(
2004 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2005 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2006}
2007
2008Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2009 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2010 // tcbhead_t; use it instead of the usual global variable (see
2011 // sysdeps/{i386,x86_64}/nptl/tls.h)
2012 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2013 if (Subtarget.isTargetFuchsia()) {
2014 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2015 return SegmentOffset(IRB, 0x10, getAddressSpace());
2016 } else {
2017 // %fs:0x28, unless we're using a Kernel code model, in which case
2018 // it's %gs:0x28. gs:0x14 on i386.
2019 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2020 return SegmentOffset(IRB, Offset, getAddressSpace());
2021 }
2022 }
2023
2024 return TargetLowering::getIRStackGuard(IRB);
2025}
2026
2027void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2028 // MSVC CRT provides functionalities for stack protection.
2029 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
2030 // MSVC CRT has a global variable holding security cookie.
2031 M.getOrInsertGlobal("__security_cookie",
2032 Type::getInt8PtrTy(M.getContext()));
2033
2034 // MSVC CRT has a function to validate security cookie.
2035 auto *SecurityCheckCookie = cast<Function>(
2036 M.getOrInsertFunction("__security_check_cookie",
2037 Type::getVoidTy(M.getContext()),
2038 Type::getInt8PtrTy(M.getContext())));
2039 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2040 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2041 return;
2042 }
2043 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2044 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2045 return;
2046 TargetLowering::insertSSPDeclarations(M);
2047}
2048
2049Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2050 // MSVC CRT has a global variable holding security cookie.
2051 if (Subtarget.getTargetTriple().isOSMSVCRT())
2052 return M.getGlobalVariable("__security_cookie");
2053 return TargetLowering::getSDagStackGuard(M);
2054}
2055
2056Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2057 // MSVC CRT has a function to validate security cookie.
2058 if (Subtarget.getTargetTriple().isOSMSVCRT())
2059 return M.getFunction("__security_check_cookie");
2060 return TargetLowering::getSSPStackGuardCheck(M);
2061}
2062
2063Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2064 if (Subtarget.getTargetTriple().isOSContiki())
2065 return getDefaultSafeStackPointerLocation(IRB, false);
2066
2067 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2068 // definition of TLS_SLOT_SAFESTACK in
2069 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2070 if (Subtarget.isTargetAndroid()) {
2071 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2072 // %gs:0x24 on i386
2073 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2074 return SegmentOffset(IRB, Offset, getAddressSpace());
2075 }
2076
2077 // Fuchsia is similar.
2078 if (Subtarget.isTargetFuchsia()) {
2079 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2080 return SegmentOffset(IRB, 0x18, getAddressSpace());
2081 }
2082
2083 return TargetLowering::getSafeStackPointerLocation(IRB);
2084}
2085
2086bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2087 unsigned DestAS) const {
2088 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2088, __PRETTY_FUNCTION__))
;
2089
2090 return SrcAS < 256 && DestAS < 256;
2091}
2092
2093//===----------------------------------------------------------------------===//
2094// Return Value Calling Convention Implementation
2095//===----------------------------------------------------------------------===//
2096
2097#include "X86GenCallingConv.inc"
2098
2099bool X86TargetLowering::CanLowerReturn(
2100 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2101 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2102 SmallVector<CCValAssign, 16> RVLocs;
2103 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2104 return CCInfo.CheckReturn(Outs, RetCC_X86);
2105}
2106
2107const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2108 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2109 return ScratchRegs;
2110}
2111
2112/// Lowers masks values (v*i1) to the local register values
2113/// \returns DAG node after lowering to register type
2114static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2115 const SDLoc &Dl, SelectionDAG &DAG) {
2116 EVT ValVT = ValArg.getValueType();
2117
2118 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2119 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2120 // Two stage lowering might be required
2121 // bitcast: v8i1 -> i8 / v16i1 -> i16
2122 // anyextend: i8 -> i32 / i16 -> i32
2123 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2124 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2125 if (ValLoc == MVT::i32)
2126 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2127 return ValToCopy;
2128 } else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2129 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2130 // One stage lowering is required
2131 // bitcast: v32i1 -> i32 / v64i1 -> i64
2132 return DAG.getBitcast(ValLoc, ValArg);
2133 } else
2134 return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
2135}
2136
2137/// Breaks v64i1 value into two registers and adds the new node to the DAG
2138static void Passv64i1ArgInRegs(
2139 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2140 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2141 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2142 assert((Subtarget.hasBWI() || Subtarget.hasBMI()) &&(((Subtarget.hasBWI() || Subtarget.hasBMI()) && "Expected AVX512BW or AVX512BMI target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI() || Subtarget.hasBMI()) && \"Expected AVX512BW or AVX512BMI target!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2143, __PRETTY_FUNCTION__))
2143 "Expected AVX512BW or AVX512BMI target!")(((Subtarget.hasBWI() || Subtarget.hasBMI()) && "Expected AVX512BW or AVX512BMI target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI() || Subtarget.hasBMI()) && \"Expected AVX512BW or AVX512BMI target!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2143, __PRETTY_FUNCTION__))
;
2144 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2144, __PRETTY_FUNCTION__))
;
2145 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2145, __PRETTY_FUNCTION__))
;
2146 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2147, __PRETTY_FUNCTION__))
2147 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2147, __PRETTY_FUNCTION__))
;
2148
2149 // Before splitting the value we cast it to i64
2150 Arg = DAG.getBitcast(MVT::i64, Arg);
2151
2152 // Splitting the value into two i32 types
2153 SDValue Lo, Hi;
2154 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2155 DAG.getConstant(0, Dl, MVT::i32));
2156 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2157 DAG.getConstant(1, Dl, MVT::i32));
2158
2159 // Attach the two i32 types into corresponding registers
2160 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2161 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2162}
2163
2164SDValue
2165X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2166 bool isVarArg,
2167 const SmallVectorImpl<ISD::OutputArg> &Outs,
2168 const SmallVectorImpl<SDValue> &OutVals,
2169 const SDLoc &dl, SelectionDAG &DAG) const {
2170 MachineFunction &MF = DAG.getMachineFunction();
2171 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2172
2173 // In some cases we need to disable registers from the default CSR list.
2174 // For example, when they are used for argument passing.
2175 bool ShouldDisableCalleeSavedRegister =
2176 CallConv == CallingConv::X86_RegCall ||
2177 MF.getFunction()->hasFnAttribute("no_caller_saved_registers");
2178
2179 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2180 report_fatal_error("X86 interrupts may not return any value");
2181
2182 SmallVector<CCValAssign, 16> RVLocs;
2183 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2184 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2185
2186 SDValue Flag;
2187 SmallVector<SDValue, 6> RetOps;
2188 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2189 // Operand #1 = Bytes To Pop
2190 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2191 MVT::i32));
2192
2193 // Copy the result values into the output registers.
2194 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2195 ++I, ++OutsIndex) {
2196 CCValAssign &VA = RVLocs[I];
2197 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2197, __PRETTY_FUNCTION__))
;
2198
2199 // Add the register to the CalleeSaveDisableRegs list.
2200 if (ShouldDisableCalleeSavedRegister)
2201 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2202
2203 SDValue ValToCopy = OutVals[OutsIndex];
2204 EVT ValVT = ValToCopy.getValueType();
2205
2206 // Promote values to the appropriate types.
2207 if (VA.getLocInfo() == CCValAssign::SExt)
2208 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2209 else if (VA.getLocInfo() == CCValAssign::ZExt)
2210 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2211 else if (VA.getLocInfo() == CCValAssign::AExt) {
2212 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2213 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2214 else
2215 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2216 }
2217 else if (VA.getLocInfo() == CCValAssign::BCvt)
2218 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2219
2220 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2221, __PRETTY_FUNCTION__))
2221 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2221, __PRETTY_FUNCTION__))
;
2222
2223 // If this is x86-64, and we disabled SSE, we can't return FP values,
2224 // or SSE or MMX vectors.
2225 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2226 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2227 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2228 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2229 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2230 } else if (ValVT == MVT::f64 &&
2231 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2232 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2233 // llvm-gcc has never done it right and no one has noticed, so this
2234 // should be OK for now.
2235 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2236 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2237 }
2238
2239 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2240 // the RET instruction and handled by the FP Stackifier.
2241 if (VA.getLocReg() == X86::FP0 ||
2242 VA.getLocReg() == X86::FP1) {
2243 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2244 // change the value to the FP stack register class.
2245 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2246 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2247 RetOps.push_back(ValToCopy);
2248 // Don't emit a copytoreg.
2249 continue;
2250 }
2251
2252 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2253 // which is returned in RAX / RDX.
2254 if (Subtarget.is64Bit()) {
2255 if (ValVT == MVT::x86mmx) {
2256 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2257 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2258 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2259 ValToCopy);
2260 // If we don't have SSE2 available, convert to v4f32 so the generated
2261 // register is legal.
2262 if (!Subtarget.hasSSE2())
2263 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2264 }
2265 }
2266 }
2267
2268 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2269
2270 if (VA.needsCustom()) {
2271 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2272, __PRETTY_FUNCTION__))
2272 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2272, __PRETTY_FUNCTION__))
;
2273
2274 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2275 Subtarget);
2276
2277 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2278, __PRETTY_FUNCTION__))
2278 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2278, __PRETTY_FUNCTION__))
;
2279
2280 // Add the second register to the CalleeSaveDisableRegs list.
2281 if (ShouldDisableCalleeSavedRegister)
2282 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2283 } else {
2284 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2285 }
2286
2287 // Add nodes to the DAG and add the values into the RetOps list
2288 for (auto &Reg : RegsToPass) {
2289 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2290 Flag = Chain.getValue(1);
2291 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2292 }
2293 }
2294
2295 // Swift calling convention does not require we copy the sret argument
2296 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2297
2298 // All x86 ABIs require that for returning structs by value we copy
2299 // the sret argument into %rax/%eax (depending on ABI) for the return.
2300 // We saved the argument into a virtual register in the entry block,
2301 // so now we copy the value out and into %rax/%eax.
2302 //
2303 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2304 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2305 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2306 // either case FuncInfo->setSRetReturnReg() will have been called.
2307 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2308 // When we have both sret and another return value, we should use the
2309 // original Chain stored in RetOps[0], instead of the current Chain updated
2310 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2311
2312 // For the case of sret and another return value, we have
2313 // Chain_0 at the function entry
2314 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2315 // If we use Chain_1 in getCopyFromReg, we will have
2316 // Val = getCopyFromReg(Chain_1)
2317 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2318
2319 // getCopyToReg(Chain_0) will be glued together with
2320 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2321 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2322 // Data dependency from Unit B to Unit A due to usage of Val in
2323 // getCopyToReg(Chain_1, Val)
2324 // Chain dependency from Unit A to Unit B
2325
2326 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2327 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2328 getPointerTy(MF.getDataLayout()));
2329
2330 unsigned RetValReg
2331 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2332 X86::RAX : X86::EAX;
2333 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2334 Flag = Chain.getValue(1);
2335
2336 // RAX/EAX now acts like a return value.
2337 RetOps.push_back(
2338 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2339
2340 // Add the returned register to the CalleeSaveDisableRegs list.
2341 if (ShouldDisableCalleeSavedRegister)
2342 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2343 }
2344
2345 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2346 const MCPhysReg *I =
2347 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2348 if (I) {
2349 for (; *I; ++I) {
2350 if (X86::GR64RegClass.contains(*I))
2351 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2352 else
2353 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2353)
;
2354 }
2355 }
2356
2357 RetOps[0] = Chain; // Update chain.
2358
2359 // Add the flag if we have it.
2360 if (Flag.getNode())
2361 RetOps.push_back(Flag);
2362
2363 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2364 if (CallConv == CallingConv::X86_INTR)
2365 opcode = X86ISD::IRET;
2366 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2367}
2368
2369bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2370 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2371 return false;
2372
2373 SDValue TCChain = Chain;
2374 SDNode *Copy = *N->use_begin();
2375 if (Copy->getOpcode() == ISD::CopyToReg) {
2376 // If the copy has a glue operand, we conservatively assume it isn't safe to
2377 // perform a tail call.
2378 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2379 return false;
2380 TCChain = Copy->getOperand(0);
2381 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2382 return false;
2383
2384 bool HasRet = false;
2385 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2386 UI != UE; ++UI) {
2387 if (UI->getOpcode() != X86ISD::RET_FLAG)
2388 return false;
2389 // If we are returning more than one value, we can definitely
2390 // not make a tail call see PR19530
2391 if (UI->getNumOperands() > 4)
2392 return false;
2393 if (UI->getNumOperands() == 4 &&
2394 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2395 return false;
2396 HasRet = true;
2397 }
2398
2399 if (!HasRet)
2400 return false;
2401
2402 Chain = TCChain;
2403 return true;
2404}
2405
2406EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2407 ISD::NodeType ExtendKind) const {
2408 MVT ReturnMVT = MVT::i32;
2409
2410 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2411 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2412 // The ABI does not require i1, i8 or i16 to be extended.
2413 //
2414 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2415 // always extending i8/i16 return values, so keep doing that for now.
2416 // (PR26665).
2417 ReturnMVT = MVT::i8;
2418 }
2419
2420 EVT MinVT = getRegisterType(Context, ReturnMVT);
2421 return VT.bitsLT(MinVT) ? MinVT : VT;
2422}
2423
2424/// Reads two 32 bit registers and creates a 64 bit mask value.
2425/// \param VA The current 32 bit value that need to be assigned.
2426/// \param NextVA The next 32 bit value that need to be assigned.
2427/// \param Root The parent DAG node.
2428/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2429/// glue purposes. In the case the DAG is already using
2430/// physical register instead of virtual, we should glue
2431/// our new SDValue to InFlag SDvalue.
2432/// \return a new SDvalue of size 64bit.
2433static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2434 SDValue &Root, SelectionDAG &DAG,
2435 const SDLoc &Dl, const X86Subtarget &Subtarget,
2436 SDValue *InFlag = nullptr) {
2437 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2437, __PRETTY_FUNCTION__))
;
2438 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2438, __PRETTY_FUNCTION__))
;
2439 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2440, __PRETTY_FUNCTION__))
2440 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2440, __PRETTY_FUNCTION__))
;
2441 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2442, __PRETTY_FUNCTION__))
2442 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2442, __PRETTY_FUNCTION__))
;
2443 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2444, __PRETTY_FUNCTION__))
2444 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2444, __PRETTY_FUNCTION__))
;
2445
2446 SDValue Lo, Hi;
2447 unsigned Reg;
2448 SDValue ArgValueLo, ArgValueHi;
2449
2450 MachineFunction &MF = DAG.getMachineFunction();
2451 const TargetRegisterClass *RC = &X86::GR32RegClass;
2452
2453 // Read a 32 bit value from the registers
2454 if (nullptr == InFlag) {
2455 // When no physical register is present,
2456 // create an intermediate virtual register
2457 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2458 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2459 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2460 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2461 } else {
2462 // When a physical register is available read the value from it and glue
2463 // the reads together.
2464 ArgValueLo =
2465 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2466 *InFlag = ArgValueLo.getValue(2);
2467 ArgValueHi =
2468 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2469 *InFlag = ArgValueHi.getValue(2);
2470 }
2471
2472 // Convert the i32 type into v32i1 type
2473 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2474
2475 // Convert the i32 type into v32i1 type
2476 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2477
2478 // Concatenate the two values together
2479 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2480}
2481
2482/// The function will lower a register of various sizes (8/16/32/64)
2483/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2484/// \returns a DAG node contains the operand after lowering to mask type.
2485static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2486 const EVT &ValLoc, const SDLoc &Dl,
2487 SelectionDAG &DAG) {
2488 SDValue ValReturned = ValArg;
2489
2490 if (ValVT == MVT::v1i1)
2491 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2492
2493 if (ValVT == MVT::v64i1) {
2494 // In 32 bit machine, this case is handled by getv64i1Argument
2495 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2495, __PRETTY_FUNCTION__))
;
2496 // In 64 bit machine, There is no need to truncate the value only bitcast
2497 } else {
2498 MVT maskLen;
2499 switch (ValVT.getSimpleVT().SimpleTy) {
2500 case MVT::v8i1:
2501 maskLen = MVT::i8;
2502 break;
2503 case MVT::v16i1:
2504 maskLen = MVT::i16;
2505 break;
2506 case MVT::v32i1:
2507 maskLen = MVT::i32;
2508 break;
2509 default:
2510 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2510)
;
2511 }
2512
2513 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2514 }
2515 return DAG.getBitcast(ValVT, ValReturned);
2516}
2517
2518/// Lower the result values of a call into the
2519/// appropriate copies out of appropriate physical registers.
2520///
2521SDValue X86TargetLowering::LowerCallResult(
2522 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2523 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2524 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2525 uint32_t *RegMask) const {
2526
2527 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2528 // Assign locations to each value returned by this call.
2529 SmallVector<CCValAssign, 16> RVLocs;
2530 bool Is64Bit = Subtarget.is64Bit();
2531 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2532 *DAG.getContext());
2533 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2534
2535 // Copy all of the result registers out of their specified physreg.
2536 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2537 ++I, ++InsIndex) {
2538 CCValAssign &VA = RVLocs[I];
2539 EVT CopyVT = VA.getLocVT();
2540
2541 // In some calling conventions we need to remove the used registers
2542 // from the register mask.
2543 if (RegMask) {
2544 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2545 SubRegs.isValid(); ++SubRegs)
2546 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2547 }
2548
2549 // If this is x86-64, and we disabled SSE, we can't return FP values
2550 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2551 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2552 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2553 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2554 }
2555
2556 // If we prefer to use the value in xmm registers, copy it out as f80 and
2557 // use a truncate to move it from fp stack reg to xmm reg.
2558 bool RoundAfterCopy = false;
2559 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2560 isScalarFPTypeInSSEReg(VA.getValVT())) {
2561 if (!Subtarget.hasX87())
2562 report_fatal_error("X87 register return with X87 disabled");
2563 CopyVT = MVT::f80;
2564 RoundAfterCopy = (CopyVT != VA.getLocVT());
2565 }
2566
2567 SDValue Val;
2568 if (VA.needsCustom()) {
2569 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2570, __PRETTY_FUNCTION__))
2570 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2570, __PRETTY_FUNCTION__))
;
2571 Val =
2572 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2573 } else {
2574 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2575 .getValue(1);
2576 Val = Chain.getValue(0);
2577 InFlag = Chain.getValue(2);
2578 }
2579
2580 if (RoundAfterCopy)
2581 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2582 // This truncation won't change the value.
2583 DAG.getIntPtrConstant(1, dl));
2584
2585 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2586 if (VA.getValVT().isVector() &&
2587 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2588 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2589 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2590 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2591 } else
2592 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2593 }
2594
2595 InVals.push_back(Val);
2596 }
2597
2598 return Chain;
2599}
2600
2601//===----------------------------------------------------------------------===//
2602// C & StdCall & Fast Calling Convention implementation
2603//===----------------------------------------------------------------------===//
2604// StdCall calling convention seems to be standard for many Windows' API
2605// routines and around. It differs from C calling convention just a little:
2606// callee should clean up the stack, not caller. Symbols should be also
2607// decorated in some fancy way :) It doesn't support any vector arguments.
2608// For info on fast calling convention see Fast Calling Convention (tail call)
2609// implementation LowerX86_32FastCCCallTo.
2610
2611/// CallIsStructReturn - Determines whether a call uses struct return
2612/// semantics.
2613enum StructReturnType {
2614 NotStructReturn,
2615 RegStructReturn,
2616 StackStructReturn
2617};
2618static StructReturnType
2619callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2620 if (Outs.empty())
2621 return NotStructReturn;
2622
2623 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2624 if (!Flags.isSRet())
2625 return NotStructReturn;
2626 if (Flags.isInReg() || IsMCU)
2627 return RegStructReturn;
2628 return StackStructReturn;
2629}
2630
2631/// Determines whether a function uses struct return semantics.
2632static StructReturnType
2633argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2634 if (Ins.empty())
2635 return NotStructReturn;
2636
2637 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2638 if (!Flags.isSRet())
2639 return NotStructReturn;
2640 if (Flags.isInReg() || IsMCU)
2641 return RegStructReturn;
2642 return StackStructReturn;
2643}
2644
2645/// Make a copy of an aggregate at address specified by "Src" to address
2646/// "Dst" with size and alignment information specified by the specific
2647/// parameter attribute. The copy will be passed as a byval function parameter.
2648static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2649 SDValue Chain, ISD::ArgFlagsTy Flags,
2650 SelectionDAG &DAG, const SDLoc &dl) {
2651 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2652
2653 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2654 /*isVolatile*/false, /*AlwaysInline=*/true,
2655 /*isTailCall*/false,
2656 MachinePointerInfo(), MachinePointerInfo());
2657}
2658
2659/// Return true if the calling convention is one that we can guarantee TCO for.
2660static bool canGuaranteeTCO(CallingConv::ID CC) {
2661 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2662 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2663 CC == CallingConv::HHVM);
2664}
2665
2666/// Return true if we might ever do TCO for calls with this calling convention.
2667static bool mayTailCallThisCC(CallingConv::ID CC) {
2668 switch (CC) {
2669 // C calling conventions:
2670 case CallingConv::C:
2671 case CallingConv::Win64:
2672 case CallingConv::X86_64_SysV:
2673 // Callee pop conventions:
2674 case CallingConv::X86_ThisCall:
2675 case CallingConv::X86_StdCall:
2676 case CallingConv::X86_VectorCall:
2677 case CallingConv::X86_FastCall:
2678 return true;
2679 default:
2680 return canGuaranteeTCO(CC);
2681 }
2682}
2683
2684/// Return true if the function is being made into a tailcall target by
2685/// changing its ABI.
2686static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2687 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2688}
2689
2690bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2691 auto Attr =
2692 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2693 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2694 return false;
2695
2696 ImmutableCallSite CS(CI);
2697 CallingConv::ID CalleeCC = CS.getCallingConv();
2698 if (!mayTailCallThisCC(CalleeCC))
2699 return false;
2700
2701 return true;
2702}
2703
2704SDValue
2705X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2706 const SmallVectorImpl<ISD::InputArg> &Ins,
2707 const SDLoc &dl, SelectionDAG &DAG,
2708 const CCValAssign &VA,
2709 MachineFrameInfo &MFI, unsigned i) const {
2710 // Create the nodes corresponding to a load from this parameter slot.
2711 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2712 bool AlwaysUseMutable = shouldGuaranteeTCO(
2713 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2714 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2715 EVT ValVT;
2716 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2717
2718 // If value is passed by pointer we have address passed instead of the value
2719 // itself. No need to extend if the mask value and location share the same
2720 // absolute size.
2721 bool ExtendedInMem =
2722 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2723 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2724
2725 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2726 ValVT = VA.getLocVT();
2727 else
2728 ValVT = VA.getValVT();
2729
2730 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2731 // taken by a return address.
2732 int Offset = 0;
2733 if (CallConv == CallingConv::X86_INTR) {
2734 // X86 interrupts may take one or two arguments.
2735 // On the stack there will be no return address as in regular call.
2736 // Offset of last argument need to be set to -4/-8 bytes.
2737 // Where offset of the first argument out of two, should be set to 0 bytes.
2738 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2739 if (Subtarget.is64Bit() && Ins.size() == 2) {
2740 // The stack pointer needs to be realigned for 64 bit handlers with error
2741 // code, so the argument offset changes by 8 bytes.
2742 Offset += 8;
2743 }
2744 }
2745
2746 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2747 // changed with more analysis.
2748 // In case of tail call optimization mark all arguments mutable. Since they
2749 // could be overwritten by lowering of arguments in case of a tail call.
2750 if (Flags.isByVal()) {
2751 unsigned Bytes = Flags.getByValSize();
2752 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2753 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2754 // Adjust SP offset of interrupt parameter.
2755 if (CallConv == CallingConv::X86_INTR) {
2756 MFI.setObjectOffset(FI, Offset);
2757 }
2758 return DAG.getFrameIndex(FI, PtrVT);
2759 }
2760
2761 // This is an argument in memory. We might be able to perform copy elision.
2762 if (Flags.isCopyElisionCandidate()) {
2763 EVT ArgVT = Ins[i].ArgVT;
2764 SDValue PartAddr;
2765 if (Ins[i].PartOffset == 0) {
2766 // If this is a one-part value or the first part of a multi-part value,
2767 // create a stack object for the entire argument value type and return a
2768 // load from our portion of it. This assumes that if the first part of an
2769 // argument is in memory, the rest will also be in memory.
2770 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
2771 /*Immutable=*/false);
2772 PartAddr = DAG.getFrameIndex(FI, PtrVT);
2773 return DAG.getLoad(
2774 ValVT, dl, Chain, PartAddr,
2775 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2776 } else {
2777 // This is not the first piece of an argument in memory. See if there is
2778 // already a fixed stack object including this offset. If so, assume it
2779 // was created by the PartOffset == 0 branch above and create a load from
2780 // the appropriate offset into it.
2781 int64_t PartBegin = VA.getLocMemOffset();
2782 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
2783 int FI = MFI.getObjectIndexBegin();
2784 for (; MFI.isFixedObjectIndex(FI); ++FI) {
2785 int64_t ObjBegin = MFI.getObjectOffset(FI);
2786 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
2787 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
2788 break;
2789 }
2790 if (MFI.isFixedObjectIndex(FI)) {
2791 SDValue Addr =
2792 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
2793 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
2794 return DAG.getLoad(
2795 ValVT, dl, Chain, Addr,
2796 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
2797 Ins[i].PartOffset));
2798 }
2799 }
2800 }
2801
2802 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2803 VA.getLocMemOffset(), isImmutable);
2804
2805 // Set SExt or ZExt flag.
2806 if (VA.getLocInfo() == CCValAssign::ZExt) {
2807 MFI.setObjectZExt(FI, true);
2808 } else if (VA.getLocInfo() == CCValAssign::SExt) {
2809 MFI.setObjectSExt(FI, true);
2810 }
2811
2812 // Adjust SP offset of interrupt parameter.
2813 if (CallConv == CallingConv::X86_INTR) {
2814 MFI.setObjectOffset(FI, Offset);
2815 }
2816
2817 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2818 SDValue Val = DAG.getLoad(
2819 ValVT, dl, Chain, FIN,
2820 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2821 return ExtendedInMem
2822 ? (VA.getValVT().isVector()
2823 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
2824 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
2825 : Val;
2826}
2827
2828// FIXME: Get this from tablegen.
2829static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2830 const X86Subtarget &Subtarget) {
2831 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2831, __PRETTY_FUNCTION__))
;
2832
2833 if (Subtarget.isCallingConvWin64(CallConv)) {
2834 static const MCPhysReg GPR64ArgRegsWin64[] = {
2835 X86::RCX, X86::RDX, X86::R8, X86::R9
2836 };
2837 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2838 }
2839
2840 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2841 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2842 };
2843 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2844}
2845
2846// FIXME: Get this from tablegen.
2847static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2848 CallingConv::ID CallConv,
2849 const X86Subtarget &Subtarget) {
2850 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2850, __PRETTY_FUNCTION__))
;
2851 if (Subtarget.isCallingConvWin64(CallConv)) {
2852 // The XMM registers which might contain var arg parameters are shadowed
2853 // in their paired GPR. So we only need to save the GPR to their home
2854 // slots.
2855 // TODO: __vectorcall will change this.
2856 return None;
2857 }
2858
2859 const Function *Fn = MF.getFunction();
2860 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2861 bool isSoftFloat = Subtarget.useSoftFloat();
2862 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2863, __PRETTY_FUNCTION__))
2863 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2863, __PRETTY_FUNCTION__))
;
2864 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
2865 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2866 // registers.
2867 return None;
2868
2869 static const MCPhysReg XMMArgRegs64Bit[] = {
2870 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2871 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2872 };
2873 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2874}
2875
2876#ifndef NDEBUG
2877static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
2878 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2879 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2880 return A.getValNo() < B.getValNo();
2881 });
2882}
2883#endif
2884
2885SDValue X86TargetLowering::LowerFormalArguments(
2886 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2887 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2888 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2889 MachineFunction &MF = DAG.getMachineFunction();
2890 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2891 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
2892
2893 const Function *Fn = MF.getFunction();
2894 if (Fn->hasExternalLinkage() &&
2895 Subtarget.isTargetCygMing() &&
2896 Fn->getName() == "main")
2897 FuncInfo->setForceFramePointer(true);
2898
2899 MachineFrameInfo &MFI = MF.getFrameInfo();
2900 bool Is64Bit = Subtarget.is64Bit();
2901 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2902
2903 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2905, __PRETTY_FUNCTION__))
2904 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2905, __PRETTY_FUNCTION__))
2905 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2905, __PRETTY_FUNCTION__))
;
2906
2907 if (CallConv == CallingConv::X86_INTR) {
2908 bool isLegal = Ins.size() == 1 ||
2909 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2910 (!Is64Bit && Ins[1].VT == MVT::i32)));
2911 if (!isLegal)
2912 report_fatal_error("X86 interrupts may take one or two arguments");
2913 }
2914
2915 // Assign locations to all of the incoming arguments.
2916 SmallVector<CCValAssign, 16> ArgLocs;
2917 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2918
2919 // Allocate shadow area for Win64.
2920 if (IsWin64)
2921 CCInfo.AllocateStack(32, 8);
2922
2923 CCInfo.AnalyzeArguments(Ins, CC_X86);
2924
2925 // In vectorcall calling convention a second pass is required for the HVA
2926 // types.
2927 if (CallingConv::X86_VectorCall == CallConv) {
2928 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
2929 }
2930
2931 // The next loop assumes that the locations are in the same order of the
2932 // input arguments.
2933 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2934, __PRETTY_FUNCTION__))
2934 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2934, __PRETTY_FUNCTION__))
;
2935
2936 SDValue ArgValue;
2937 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
2938 ++I, ++InsIndex) {
2939 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2939, __PRETTY_FUNCTION__))
;
2940 CCValAssign &VA = ArgLocs[I];
2941
2942 if (VA.isRegLoc()) {
2943 EVT RegVT = VA.getLocVT();
2944 if (VA.needsCustom()) {
2945 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2947, __PRETTY_FUNCTION__))
2946 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2947, __PRETTY_FUNCTION__))
2947 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2947, __PRETTY_FUNCTION__))
;
2948
2949 // v64i1 values, in regcall calling convention, that are
2950 // compiled to 32 bit arch, are split up into two registers.
2951 ArgValue =
2952 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
2953 } else {
2954 const TargetRegisterClass *RC;
2955 if (RegVT == MVT::i32)
2956 RC = &X86::GR32RegClass;
2957 else if (Is64Bit && RegVT == MVT::i64)
2958 RC = &X86::GR64RegClass;
2959 else if (RegVT == MVT::f32)
2960 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
2961 else if (RegVT == MVT::f64)
2962 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
2963 else if (RegVT == MVT::f80)
2964 RC = &X86::RFP80RegClass;
2965 else if (RegVT == MVT::f128)
2966 RC = &X86::FR128RegClass;
2967 else if (RegVT.is512BitVector())
2968 RC = &X86::VR512RegClass;
2969 else if (RegVT.is256BitVector())
2970 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
2971 else if (RegVT.is128BitVector())
2972 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
2973 else if (RegVT == MVT::x86mmx)
2974 RC = &X86::VR64RegClass;
2975 else if (RegVT == MVT::v1i1)
2976 RC = &X86::VK1RegClass;
2977 else if (RegVT == MVT::v8i1)
2978 RC = &X86::VK8RegClass;
2979 else if (RegVT == MVT::v16i1)
2980 RC = &X86::VK16RegClass;
2981 else if (RegVT == MVT::v32i1)
2982 RC = &X86::VK32RegClass;
2983 else if (RegVT == MVT::v64i1)
2984 RC = &X86::VK64RegClass;
2985 else
2986 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 2986)
;
2987
2988 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2989 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2990 }
2991
2992 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2993 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2994 // right size.
2995 if (VA.getLocInfo() == CCValAssign::SExt)
2996 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2997 DAG.getValueType(VA.getValVT()));
2998 else if (VA.getLocInfo() == CCValAssign::ZExt)
2999 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3000 DAG.getValueType(VA.getValVT()));
3001 else if (VA.getLocInfo() == CCValAssign::BCvt)
3002 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3003
3004 if (VA.isExtInLoc()) {
3005 // Handle MMX values passed in XMM regs.
3006 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3007 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3008 else if (VA.getValVT().isVector() &&
3009 VA.getValVT().getScalarType() == MVT::i1 &&
3010 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3011 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3012 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3013 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3014 } else
3015 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3016 }
3017 } else {
3018 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3018, __PRETTY_FUNCTION__))
;
3019 ArgValue =
3020 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3021 }
3022
3023 // If value is passed via pointer - do a load.
3024 if (VA.getLocInfo() == CCValAssign::Indirect)
3025 ArgValue =
3026 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3027
3028 InVals.push_back(ArgValue);
3029 }
3030
3031 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3032 // Swift calling convention does not require we copy the sret argument
3033 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3034 if (CallConv == CallingConv::Swift)
3035 continue;
3036
3037 // All x86 ABIs require that for returning structs by value we copy the
3038 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3039 // the argument into a virtual register so that we can access it from the
3040 // return points.
3041 if (Ins[I].Flags.isSRet()) {
3042 unsigned Reg = FuncInfo->getSRetReturnReg();
3043 if (!Reg) {
3044 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3045 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3046 FuncInfo->setSRetReturnReg(Reg);
3047 }
3048 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3049 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3050 break;
3051 }
3052 }
3053
3054 unsigned StackSize = CCInfo.getNextStackOffset();
3055 // Align stack specially for tail calls.
3056 if (shouldGuaranteeTCO(CallConv,
3057 MF.getTarget().Options.GuaranteedTailCallOpt))
3058 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3059
3060 // If the function takes variable number of arguments, make a frame index for
3061 // the start of the first vararg value... for expansion of llvm.va_start. We
3062 // can skip this if there are no va_start calls.
3063 if (MFI.hasVAStart() &&
3064 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3065 CallConv != CallingConv::X86_ThisCall))) {
3066 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3067 }
3068
3069 // Figure out if XMM registers are in use.
3070 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3072, __PRETTY_FUNCTION__))
3071 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3072, __PRETTY_FUNCTION__))
3072 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3072, __PRETTY_FUNCTION__))
;
3073
3074 // 64-bit calling conventions support varargs and register parameters, so we
3075 // have to do extra work to spill them in the prologue.
3076 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3077 // Find the first unallocated argument registers.
3078 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3079 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3080 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3081 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3082 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3083, __PRETTY_FUNCTION__))
3083 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3083, __PRETTY_FUNCTION__))
;
3084
3085 // Gather all the live in physical registers.
3086 SmallVector<SDValue, 6> LiveGPRs;
3087 SmallVector<SDValue, 8> LiveXMMRegs;
3088 SDValue ALVal;
3089 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3090 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3091 LiveGPRs.push_back(
3092 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3093 }
3094 if (!ArgXMMs.empty()) {
3095 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3096 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3097 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3098 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3099 LiveXMMRegs.push_back(
3100 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3101 }
3102 }
3103
3104 if (IsWin64) {
3105 // Get to the caller-allocated home save location. Add 8 to account
3106 // for the return address.
3107 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3108 FuncInfo->setRegSaveFrameIndex(
3109 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3110 // Fixup to set vararg frame on shadow area (4 x i64).
3111 if (NumIntRegs < 4)
3112 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3113 } else {
3114 // For X86-64, if there are vararg parameters that are passed via
3115 // registers, then we must store them to their spots on the stack so
3116 // they may be loaded by dereferencing the result of va_next.
3117 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3118 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3119 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3120 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3121 }
3122
3123 // Store the integer parameter registers.
3124 SmallVector<SDValue, 8> MemOps;
3125 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3126 getPointerTy(DAG.getDataLayout()));
3127 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3128 for (SDValue Val : LiveGPRs) {
3129 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3130 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3131 SDValue Store =
3132 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3133 MachinePointerInfo::getFixedStack(
3134 DAG.getMachineFunction(),
3135 FuncInfo->getRegSaveFrameIndex(), Offset));
3136 MemOps.push_back(Store);
3137 Offset += 8;
3138 }
3139
3140 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3141 // Now store the XMM (fp + vector) parameter registers.
3142 SmallVector<SDValue, 12> SaveXMMOps;
3143 SaveXMMOps.push_back(Chain);
3144 SaveXMMOps.push_back(ALVal);
3145 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3146 FuncInfo->getRegSaveFrameIndex(), dl));
3147 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3148 FuncInfo->getVarArgsFPOffset(), dl));
3149 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3150 LiveXMMRegs.end());
3151 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3152 MVT::Other, SaveXMMOps));
3153 }
3154
3155 if (!MemOps.empty())
3156 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3157 }
3158
3159 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3160 // Find the largest legal vector type.
3161 MVT VecVT = MVT::Other;
3162 // FIXME: Only some x86_32 calling conventions support AVX512.
3163 if (Subtarget.hasAVX512() &&
3164 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3165 CallConv == CallingConv::Intel_OCL_BI)))
3166 VecVT = MVT::v16f32;
3167 else if (Subtarget.hasAVX())
3168 VecVT = MVT::v8f32;
3169 else if (Subtarget.hasSSE2())
3170 VecVT = MVT::v4f32;
3171
3172 // We forward some GPRs and some vector types.
3173 SmallVector<MVT, 2> RegParmTypes;
3174 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3175 RegParmTypes.push_back(IntVT);
3176 if (VecVT != MVT::Other)
3177 RegParmTypes.push_back(VecVT);
3178
3179 // Compute the set of forwarded registers. The rest are scratch.
3180 SmallVectorImpl<ForwardedRegister> &Forwards =
3181 FuncInfo->getForwardedMustTailRegParms();
3182 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3183
3184 // Conservatively forward AL on x86_64, since it might be used for varargs.
3185 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3186 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3187 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3188 }
3189
3190 // Copy all forwards from physical to virtual registers.
3191 for (ForwardedRegister &F : Forwards) {
3192 // FIXME: Can we use a less constrained schedule?
3193 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3194 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3195 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3196 }
3197 }
3198
3199 // Some CCs need callee pop.
3200 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3201 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3202 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3203 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3204 // X86 interrupts must pop the error code (and the alignment padding) if
3205 // present.
3206 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3207 } else {
3208 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3209 // If this is an sret function, the return should pop the hidden pointer.
3210 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3211 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3212 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3213 FuncInfo->setBytesToPopOnReturn(4);
3214 }
3215
3216 if (!Is64Bit) {
3217 // RegSaveFrameIndex is X86-64 only.
3218 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3219 if (CallConv == CallingConv::X86_FastCall ||
3220 CallConv == CallingConv::X86_ThisCall)
3221 // fastcc functions can't have varargs.
3222 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3223 }
3224
3225 FuncInfo->setArgumentStackSize(StackSize);
3226
3227 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3228 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
3229 if (Personality == EHPersonality::CoreCLR) {
3230 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3230, __PRETTY_FUNCTION__))
;
3231 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3232 // that we'd prefer this slot be allocated towards the bottom of the frame
3233 // (i.e. near the stack pointer after allocating the frame). Every
3234 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3235 // offset from the bottom of this and each funclet's frame must be the
3236 // same, so the size of funclets' (mostly empty) frames is dictated by
3237 // how far this slot is from the bottom (since they allocate just enough
3238 // space to accommodate holding this slot at the correct offset).
3239 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3240 EHInfo->PSPSymFrameIdx = PSPSymFI;
3241 }
3242 }
3243
3244 if (CallConv == CallingConv::X86_RegCall ||
3245 Fn->hasFnAttribute("no_caller_saved_registers")) {
3246 MachineRegisterInfo &MRI = MF.getRegInfo();
3247 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3248 MRI.disableCalleeSavedRegister(Pair.first);
3249 }
3250
3251 return Chain;
3252}
3253
3254SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3255 SDValue Arg, const SDLoc &dl,
3256 SelectionDAG &DAG,
3257 const CCValAssign &VA,
3258 ISD::ArgFlagsTy Flags) const {
3259 unsigned LocMemOffset = VA.getLocMemOffset();
3260 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3261 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3262 StackPtr, PtrOff);
3263 if (Flags.isByVal())
3264 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3265
3266 return DAG.getStore(
3267 Chain, dl, Arg, PtrOff,
3268 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3269}
3270
3271/// Emit a load of return address if tail call
3272/// optimization is performed and it is required.
3273SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3274 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3275 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3276 // Adjust the Return address stack slot.
3277 EVT VT = getPointerTy(DAG.getDataLayout());
3278 OutRetAddr = getReturnAddressFrameIndex(DAG);
3279
3280 // Load the "old" Return address.
3281 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3282 return SDValue(OutRetAddr.getNode(), 1);
3283}
3284
3285/// Emit a store of the return address if tail call
3286/// optimization is performed and it is required (FPDiff!=0).
3287static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3288 SDValue Chain, SDValue RetAddrFrIdx,
3289 EVT PtrVT, unsigned SlotSize,
3290 int FPDiff, const SDLoc &dl) {
3291 // Store the return address to the appropriate stack slot.
3292 if (!FPDiff) return Chain;
3293 // Calculate the new stack slot for the return address.
3294 int NewReturnAddrFI =
3295 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3296 false);
3297 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3298 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3299 MachinePointerInfo::getFixedStack(
3300 DAG.getMachineFunction(), NewReturnAddrFI));
3301 return Chain;
3302}
3303
3304/// Returns a vector_shuffle mask for an movs{s|d}, movd
3305/// operation of specified width.
3306static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3307 SDValue V2) {
3308 unsigned NumElems = VT.getVectorNumElements();
3309 SmallVector<int, 8> Mask;
3310 Mask.push_back(NumElems);
3311 for (unsigned i = 1; i != NumElems; ++i)
3312 Mask.push_back(i);
3313 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3314}
3315
3316SDValue
3317X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3318 SmallVectorImpl<SDValue> &InVals) const {
3319 SelectionDAG &DAG = CLI.DAG;
3320 SDLoc &dl = CLI.DL;
3321 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3322 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3323 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3324 SDValue Chain = CLI.Chain;
3325 SDValue Callee = CLI.Callee;
3326 CallingConv::ID CallConv = CLI.CallConv;
3327 bool &isTailCall = CLI.IsTailCall;
3328 bool isVarArg = CLI.IsVarArg;
3329
3330 MachineFunction &MF = DAG.getMachineFunction();
3331 bool Is64Bit = Subtarget.is64Bit();
3332 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3333 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3334 bool IsSibcall = false;
3335 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3336 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
3337 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3338 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3339 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3340 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3341
3342 if (CallConv == CallingConv::X86_INTR)
3343 report_fatal_error("X86 interrupts may not be called directly");
3344
3345 if (Attr.getValueAsString() == "true")
3346 isTailCall = false;
3347
3348 if (Subtarget.isPICStyleGOT() &&
3349 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3350 // If we are using a GOT, disable tail calls to external symbols with
3351 // default visibility. Tail calling such a symbol requires using a GOT
3352 // relocation, which forces early binding of the symbol. This breaks code
3353 // that require lazy function symbol resolution. Using musttail or
3354 // GuaranteedTailCallOpt will override this.
3355 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3356 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3357 G->getGlobal()->hasDefaultVisibility()))
3358 isTailCall = false;
3359 }
3360
3361 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3362 if (IsMustTail) {
3363 // Force this to be a tail call. The verifier rules are enough to ensure
3364 // that we can lower this successfully without moving the return address
3365 // around.
3366 isTailCall = true;
3367 } else if (isTailCall) {
3368 // Check if it's really possible to do a tail call.
3369 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3370 isVarArg, SR != NotStructReturn,
3371 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3372 Outs, OutVals, Ins, DAG);
3373
3374 // Sibcalls are automatically detected tailcalls which do not require
3375 // ABI changes.
3376 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3377 IsSibcall = true;
3378
3379 if (isTailCall)
3380 ++NumTailCalls;
3381 }
3382
3383 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3384, __PRETTY_FUNCTION__))
3384 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3384, __PRETTY_FUNCTION__))
;
3385
3386 // Analyze operands of the call, assigning locations to each operand.
3387 SmallVector<CCValAssign, 16> ArgLocs;
3388 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3389
3390 // Allocate shadow area for Win64.
3391 if (IsWin64)
3392 CCInfo.AllocateStack(32, 8);
3393
3394 CCInfo.AnalyzeArguments(Outs, CC_X86);
3395
3396 // In vectorcall calling convention a second pass is required for the HVA
3397 // types.
3398 if (CallingConv::X86_VectorCall == CallConv) {
3399 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3400 }
3401
3402 // Get a count of how many bytes are to be pushed on the stack.
3403 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3404 if (IsSibcall)
3405 // This is a sibcall. The memory operands are available in caller's
3406 // own caller's stack.
3407 NumBytes = 0;
3408 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3409 canGuaranteeTCO(CallConv))
3410 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3411
3412 int FPDiff = 0;
3413 if (isTailCall && !IsSibcall && !IsMustTail) {
3414 // Lower arguments at fp - stackoffset + fpdiff.
3415 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3416
3417 FPDiff = NumBytesCallerPushed - NumBytes;
3418
3419 // Set the delta of movement of the returnaddr stackslot.
3420 // But only set if delta is greater than previous delta.
3421 if (FPDiff < X86Info->getTCReturnAddrDelta())
3422 X86Info->setTCReturnAddrDelta(FPDiff);
3423 }
3424
3425 unsigned NumBytesToPush = NumBytes;
3426 unsigned NumBytesToPop = NumBytes;
3427
3428 // If we have an inalloca argument, all stack space has already been allocated
3429 // for us and be right at the top of the stack. We don't support multiple
3430 // arguments passed in memory when using inalloca.
3431 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3432 NumBytesToPush = 0;
3433 if (!ArgLocs.back().isMemLoc())
3434 report_fatal_error("cannot use inalloca attribute on a register "
3435 "parameter");
3436 if (ArgLocs.back().getLocMemOffset() != 0)
3437 report_fatal_error("any parameter with the inalloca attribute must be "
3438 "the only memory argument");
3439 }
3440
3441 if (!IsSibcall)
3442 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3443 NumBytes - NumBytesToPush, dl);
3444
3445 SDValue RetAddrFrIdx;
3446 // Load return address for tail calls.
3447 if (isTailCall && FPDiff)
3448 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3449 Is64Bit, FPDiff, dl);
3450
3451 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3452 SmallVector<SDValue, 8> MemOpChains;
3453 SDValue StackPtr;
3454
3455 // The next loop assumes that the locations are in the same order of the
3456 // input arguments.
3457 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3458, __PRETTY_FUNCTION__))
3458 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3458, __PRETTY_FUNCTION__))
;
3459
3460 // Walk the register/memloc assignments, inserting copies/loads. In the case
3461 // of tail call optimization arguments are handle later.
3462 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3463 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3464 ++I, ++OutIndex) {
3465 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3465, __PRETTY_FUNCTION__))
;
3466 // Skip inalloca arguments, they have already been written.
3467 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3468 if (Flags.isInAlloca())
3469 continue;
3470
3471 CCValAssign &VA = ArgLocs[I];
3472 EVT RegVT = VA.getLocVT();
3473 SDValue Arg = OutVals[OutIndex];
3474 bool isByVal = Flags.isByVal();
3475
3476 // Promote the value if needed.
3477 switch (VA.getLocInfo()) {
3478 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3478)
;
3479 case CCValAssign::Full: break;
3480 case CCValAssign::SExt:
3481 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3482 break;
3483 case CCValAssign::ZExt:
3484 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3485 break;
3486 case CCValAssign::AExt:
3487 if (Arg.getValueType().isVector() &&
3488 Arg.getValueType().getVectorElementType() == MVT::i1)
3489 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3490 else if (RegVT.is128BitVector()) {
3491 // Special case: passing MMX values in XMM registers.
3492 Arg = DAG.getBitcast(MVT::i64, Arg);
3493 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3494 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3495 } else
3496 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3497 break;
3498 case CCValAssign::BCvt:
3499 Arg = DAG.getBitcast(RegVT, Arg);
3500 break;
3501 case CCValAssign::Indirect: {
3502 // Store the argument.
3503 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3504 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3505 Chain = DAG.getStore(
3506 Chain, dl, Arg, SpillSlot,
3507 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3508 Arg = SpillSlot;
3509 break;
3510 }
3511 }
3512
3513 if (VA.needsCustom()) {
3514 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3515, __PRETTY_FUNCTION__))
3515 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3515, __PRETTY_FUNCTION__))
;
3516 // Split v64i1 value into two registers
3517 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3518 Subtarget);
3519 } else if (VA.isRegLoc()) {
3520 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3521 if (isVarArg && IsWin64) {
3522 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3523 // shadow reg if callee is a varargs function.
3524 unsigned ShadowReg = 0;
3525 switch (VA.getLocReg()) {
3526 case X86::XMM0: ShadowReg = X86::RCX; break;
3527 case X86::XMM1: ShadowReg = X86::RDX; break;
3528 case X86::XMM2: ShadowReg = X86::R8; break;
3529 case X86::XMM3: ShadowReg = X86::R9; break;
3530 }
3531 if (ShadowReg)
3532 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3533 }
3534 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3535 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3535, __PRETTY_FUNCTION__))
;
3536 if (!StackPtr.getNode())
3537 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3538 getPointerTy(DAG.getDataLayout()));
3539 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3540 dl, DAG, VA, Flags));
3541 }
3542 }
3543
3544 if (!MemOpChains.empty())
3545 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3546
3547 if (Subtarget.isPICStyleGOT()) {
3548 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3549 // GOT pointer.
3550 if (!isTailCall) {
3551 RegsToPass.push_back(std::make_pair(
3552 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3553 getPointerTy(DAG.getDataLayout()))));
3554 } else {
3555 // If we are tail calling and generating PIC/GOT style code load the
3556 // address of the callee into ECX. The value in ecx is used as target of
3557 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3558 // for tail calls on PIC/GOT architectures. Normally we would just put the
3559 // address of GOT into ebx and then call target@PLT. But for tail calls
3560 // ebx would be restored (since ebx is callee saved) before jumping to the
3561 // target@PLT.
3562
3563 // Note: The actual moving to ECX is done further down.
3564 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3565 if (G && !G->getGlobal()->hasLocalLinkage() &&
3566 G->getGlobal()->hasDefaultVisibility())
3567 Callee = LowerGlobalAddress(Callee, DAG);
3568 else if (isa<ExternalSymbolSDNode>(Callee))
3569 Callee = LowerExternalSymbol(Callee, DAG);
3570 }
3571 }
3572
3573 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3574 // From AMD64 ABI document:
3575 // For calls that may call functions that use varargs or stdargs
3576 // (prototype-less calls or calls to functions containing ellipsis (...) in
3577 // the declaration) %al is used as hidden argument to specify the number
3578 // of SSE registers used. The contents of %al do not need to match exactly
3579 // the number of registers, but must be an ubound on the number of SSE
3580 // registers used and is in the range 0 - 8 inclusive.
3581
3582 // Count the number of XMM registers allocated.
3583 static const MCPhysReg XMMArgRegs[] = {
3584 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3585 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3586 };
3587 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3588 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3589, __PRETTY_FUNCTION__))
3589 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3589, __PRETTY_FUNCTION__))
;
3590
3591 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3592 DAG.getConstant(NumXMMRegs, dl,
3593 MVT::i8)));
3594 }
3595
3596 if (isVarArg && IsMustTail) {
3597 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3598 for (const auto &F : Forwards) {
3599 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3600 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3601 }
3602 }
3603
3604 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3605 // don't need this because the eligibility check rejects calls that require
3606 // shuffling arguments passed in memory.
3607 if (!IsSibcall && isTailCall) {
3608 // Force all the incoming stack arguments to be loaded from the stack
3609 // before any new outgoing arguments are stored to the stack, because the
3610 // outgoing stack slots may alias the incoming argument stack slots, and
3611 // the alias isn't otherwise explicit. This is slightly more conservative
3612 // than necessary, because it means that each store effectively depends
3613 // on every argument instead of just those arguments it would clobber.
3614 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3615
3616 SmallVector<SDValue, 8> MemOpChains2;
3617 SDValue FIN;
3618 int FI = 0;
3619 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3620 ++I, ++OutsIndex) {
3621 CCValAssign &VA = ArgLocs[I];
3622
3623 if (VA.isRegLoc()) {
3624 if (VA.needsCustom()) {
3625 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3626, __PRETTY_FUNCTION__))
3626 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3626, __PRETTY_FUNCTION__))
;
3627 // This means that we are in special case where one argument was
3628 // passed through two register locations - Skip the next location
3629 ++I;
3630 }
3631
3632 continue;
3633 }
3634
3635 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3635, __PRETTY_FUNCTION__))
;
3636 SDValue Arg = OutVals[OutsIndex];
3637 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3638 // Skip inalloca arguments. They don't require any work.
3639 if (Flags.isInAlloca())
3640 continue;
3641 // Create frame index.
3642 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3643 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3644 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3645 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3646
3647 if (Flags.isByVal()) {
3648 // Copy relative to framepointer.
3649 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3650 if (!StackPtr.getNode())
3651 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3652 getPointerTy(DAG.getDataLayout()));
3653 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3654 StackPtr, Source);
3655
3656 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3657 ArgChain,
3658 Flags, DAG, dl));
3659 } else {
3660 // Store relative to framepointer.
3661 MemOpChains2.push_back(DAG.getStore(
3662 ArgChain, dl, Arg, FIN,
3663 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3664 }
3665 }
3666
3667 if (!MemOpChains2.empty())
3668 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3669
3670 // Store the return address to the appropriate stack slot.
3671 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3672 getPointerTy(DAG.getDataLayout()),
3673 RegInfo->getSlotSize(), FPDiff, dl);
3674 }
3675
3676 // Build a sequence of copy-to-reg nodes chained together with token chain
3677 // and flag operands which copy the outgoing args into registers.
3678 SDValue InFlag;
3679 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3680 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3681 RegsToPass[i].second, InFlag);
3682 InFlag = Chain.getValue(1);
3683 }
3684
3685 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3686 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3686, __PRETTY_FUNCTION__))
;
3687 // In the 64-bit large code model, we have to make all calls
3688 // through a register, since the call instruction's 32-bit
3689 // pc-relative offset may not be large enough to hold the whole
3690 // address.
3691 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3692 // If the callee is a GlobalAddress node (quite common, every direct call
3693 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3694 // it.
3695 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3696
3697 // We should use extra load for direct calls to dllimported functions in
3698 // non-JIT mode.
3699 const GlobalValue *GV = G->getGlobal();
3700 if (!GV->hasDLLImportStorageClass()) {
3701 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3702
3703 Callee = DAG.getTargetGlobalAddress(
3704 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3705
3706 if (OpFlags == X86II::MO_GOTPCREL) {
3707 // Add a wrapper.
3708 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3709 getPointerTy(DAG.getDataLayout()), Callee);
3710 // Add extra indirection
3711 Callee = DAG.getLoad(
3712 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3713 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3714 }
3715 }
3716 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3717 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
3718 unsigned char OpFlags =
3719 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3720
3721 Callee = DAG.getTargetExternalSymbol(
3722 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3723 } else if (Subtarget.isTarget64BitILP32() &&
3724 Callee->getValueType(0) == MVT::i32) {
3725 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3726 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3727 }
3728
3729 // Returns a chain & a flag for retval copy to use.
3730 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3731 SmallVector<SDValue, 8> Ops;
3732
3733 if (!IsSibcall && isTailCall) {
3734 Chain = DAG.getCALLSEQ_END(Chain,
3735 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3736 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3737 InFlag = Chain.getValue(1);
3738 }
3739
3740 Ops.push_back(Chain);
3741 Ops.push_back(Callee);
3742
3743 if (isTailCall)
3744 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3745
3746 // Add argument registers to the end of the list so that they are known live
3747 // into the call.
3748 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3749 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3750 RegsToPass[i].second.getValueType()));
3751
3752 // Add a register mask operand representing the call-preserved registers.
3753 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3754 // set X86_INTR calling convention because it has the same CSR mask
3755 // (same preserved registers).
3756 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3757 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3758 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3758, __PRETTY_FUNCTION__))
;
3759
3760 // If this is an invoke in a 32-bit function using a funclet-based
3761 // personality, assume the function clobbers all registers. If an exception
3762 // is thrown, the runtime will not restore CSRs.
3763 // FIXME: Model this more precisely so that we can register allocate across
3764 // the normal edge and spill and fill across the exceptional edge.
3765 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
3766 const Function *CallerFn = MF.getFunction();
3767 EHPersonality Pers =
3768 CallerFn->hasPersonalityFn()
3769 ? classifyEHPersonality(CallerFn->getPersonalityFn())
3770 : EHPersonality::Unknown;
3771 if (isFuncletEHPersonality(Pers))
3772 Mask = RegInfo->getNoPreservedMask();
3773 }
3774
3775 // Define a new register mask from the existing mask.
3776 uint32_t *RegMask = nullptr;
3777
3778 // In some calling conventions we need to remove the used physical registers
3779 // from the reg mask.
3780 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
3781 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3782
3783 // Allocate a new Reg Mask and copy Mask.
3784 RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
3785 unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
3786 memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
3787
3788 // Make sure all sub registers of the argument registers are reset
3789 // in the RegMask.
3790 for (auto const &RegPair : RegsToPass)
3791 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
3792 SubRegs.isValid(); ++SubRegs)
3793 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3794
3795 // Create the RegMask Operand according to our updated mask.
3796 Ops.push_back(DAG.getRegisterMask(RegMask));
3797 } else {
3798 // Create the RegMask Operand according to the static mask.
3799 Ops.push_back(DAG.getRegisterMask(Mask));
3800 }
3801
3802 if (InFlag.getNode())
3803 Ops.push_back(InFlag);
3804
3805 if (isTailCall) {
3806 // We used to do:
3807 //// If this is the first return lowered for this function, add the regs
3808 //// to the liveout set for the function.
3809 // This isn't right, although it's probably harmless on x86; liveouts
3810 // should be computed from returns not tail calls. Consider a void
3811 // function making a tail call to a function returning int.
3812 MF.getFrameInfo().setHasTailCall();
3813 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3814 }
3815
3816 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3817 InFlag = Chain.getValue(1);
3818
3819 // Create the CALLSEQ_END node.
3820 unsigned NumBytesForCalleeToPop;
3821 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3822 DAG.getTarget().Options.GuaranteedTailCallOpt))
3823 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3824 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3825 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3826 SR == StackStructReturn)
3827 // If this is a call to a struct-return function, the callee
3828 // pops the hidden struct pointer, so we have to push it back.
3829 // This is common for Darwin/X86, Linux & Mingw32 targets.
3830 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3831 NumBytesForCalleeToPop = 4;
3832 else
3833 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3834
3835 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
3836 // No need to reset the stack after the call if the call doesn't return. To
3837 // make the MI verify, we'll pretend the callee does it for us.
3838 NumBytesForCalleeToPop = NumBytes;
3839 }
3840
3841 // Returns a flag for retval copy to use.
3842 if (!IsSibcall) {
3843 Chain = DAG.getCALLSEQ_END(Chain,
3844 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3845 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3846 true),
3847 InFlag, dl);
3848 InFlag = Chain.getValue(1);
3849 }
3850
3851 // Handle result values, copying them out of physregs into vregs that we
3852 // return.
3853 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
3854 InVals, RegMask);
3855}
3856
3857//===----------------------------------------------------------------------===//
3858// Fast Calling Convention (tail call) implementation
3859//===----------------------------------------------------------------------===//
3860
3861// Like std call, callee cleans arguments, convention except that ECX is
3862// reserved for storing the tail called function address. Only 2 registers are
3863// free for argument passing (inreg). Tail call optimization is performed
3864// provided:
3865// * tailcallopt is enabled
3866// * caller/callee are fastcc
3867// On X86_64 architecture with GOT-style position independent code only local
3868// (within module) calls are supported at the moment.
3869// To keep the stack aligned according to platform abi the function
3870// GetAlignedArgumentStackSize ensures that argument delta is always multiples
3871// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3872// If a tail called function callee has more arguments than the caller the
3873// caller needs to make sure that there is room to move the RETADDR to. This is
3874// achieved by reserving an area the size of the argument delta right after the
3875// original RETADDR, but before the saved framepointer or the spilled registers
3876// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3877// stack layout:
3878// arg1
3879// arg2
3880// RETADDR
3881// [ new RETADDR
3882// move area ]
3883// (possible EBP)
3884// ESI
3885// EDI
3886// local1 ..
3887
3888/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
3889/// requirement.
3890unsigned
3891X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3892 SelectionDAG& DAG) const {
3893 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3894 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3895 unsigned StackAlignment = TFI.getStackAlignment();
3896 uint64_t AlignMask = StackAlignment - 1;
3897 int64_t Offset = StackSize;
3898 unsigned SlotSize = RegInfo->getSlotSize();
3899 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3900 // Number smaller than 12 so just add the difference.
3901 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3902 } else {
3903 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3904 Offset = ((~AlignMask) & Offset) + StackAlignment +
3905 (StackAlignment-SlotSize);
3906 }
3907 return Offset;
3908}
3909
3910/// Return true if the given stack call argument is already available in the
3911/// same position (relatively) of the caller's incoming argument stack.
3912static
3913bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3914 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
3915 const X86InstrInfo *TII, const CCValAssign &VA) {
3916 unsigned Bytes = Arg.getValueSizeInBits() / 8;
3917
3918 for (;;) {
3919 // Look through nodes that don't alter the bits of the incoming value.
3920 unsigned Op = Arg.getOpcode();
3921 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
3922 Arg = Arg.getOperand(0);
3923 continue;
3924 }
3925 if (Op == ISD::TRUNCATE) {
3926 const SDValue &TruncInput = Arg.getOperand(0);
3927 if (TruncInput.getOpcode() == ISD::AssertZext &&
3928 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
3929 Arg.getValueType()) {
3930 Arg = TruncInput.getOperand(0);
3931 continue;
3932 }
3933 }
3934 break;
3935 }
3936
3937 int FI = INT_MAX2147483647;
3938 if (Arg.getOpcode() == ISD::CopyFromReg) {
3939 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3940 if (!TargetRegisterInfo::isVirtualRegister(VR))
3941 return false;
3942 MachineInstr *Def = MRI->getVRegDef(VR);
3943 if (!Def)
3944 return false;
3945 if (!Flags.isByVal()) {
3946 if (!TII->isLoadFromStackSlot(*Def, FI))
3947 return false;
3948 } else {
3949 unsigned Opcode = Def->getOpcode();
3950 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
3951 Opcode == X86::LEA64_32r) &&
3952 Def->getOperand(1).isFI()) {
3953 FI = Def->getOperand(1).getIndex();
3954 Bytes = Flags.getByValSize();
3955 } else
3956 return false;
3957 }
3958 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
3959 if (Flags.isByVal())
3960 // ByVal argument is passed in as a pointer but it's now being
3961 // dereferenced. e.g.
3962 // define @foo(%struct.X* %A) {
3963 // tail call @bar(%struct.X* byval %A)
3964 // }
3965 return false;
3966 SDValue Ptr = Ld->getBasePtr();
3967 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
3968 if (!FINode)
3969 return false;
3970 FI = FINode->getIndex();
3971 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
3972 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
3973 FI = FINode->getIndex();
3974 Bytes = Flags.getByValSize();
3975 } else
3976 return false;
3977
3978 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 3978, __PRETTY_FUNCTION__))
;
3979 if (!MFI.isFixedObjectIndex(FI))
3980 return false;
3981
3982 if (Offset != MFI.getObjectOffset(FI))
3983 return false;
3984
3985 // If this is not byval, check that the argument stack object is immutable.
3986 // inalloca and argument copy elision can create mutable argument stack
3987 // objects. Byval objects can be mutated, but a byval call intends to pass the
3988 // mutated memory.
3989 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
3990 return false;
3991
3992 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
3993 // If the argument location is wider than the argument type, check that any
3994 // extension flags match.
3995 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
3996 Flags.isSExt() != MFI.isObjectSExt(FI)) {
3997 return false;
3998 }
3999 }
4000
4001 return Bytes == MFI.getObjectSize(FI);
4002}
4003
4004/// Check whether the call is eligible for tail call optimization. Targets
4005/// that want to do tail call optimization should implement this function.
4006bool X86TargetLowering::IsEligibleForTailCallOptimization(
4007 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4008 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4009 const SmallVectorImpl<ISD::OutputArg> &Outs,
4010 const SmallVectorImpl<SDValue> &OutVals,
4011 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4012 if (!mayTailCallThisCC(CalleeCC))
4013 return false;
4014
4015 // If -tailcallopt is specified, make fastcc functions tail-callable.
4016 MachineFunction &MF = DAG.getMachineFunction();
4017 const Function *CallerF = MF.getFunction();
4018
4019 // If the function return type is x86_fp80 and the callee return type is not,
4020 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4021 // perform a tailcall optimization here.
4022 if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4023 return false;
4024
4025 CallingConv::ID CallerCC = CallerF->getCallingConv();
4026 bool CCMatch = CallerCC == CalleeCC;
4027 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4028 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4029
4030 // Win64 functions have extra shadow space for argument homing. Don't do the
4031 // sibcall if the caller and callee have mismatched expectations for this
4032 // space.
4033 if (IsCalleeWin64 != IsCallerWin64)
4034 return false;
4035
4036 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4037 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4038 return true;
4039 return false;
4040 }
4041
4042 // Look for obvious safe cases to perform tail call optimization that do not
4043 // require ABI changes. This is what gcc calls sibcall.
4044
4045 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4046 // emit a special epilogue.
4047 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4048 if (RegInfo->needsStackRealignment(MF))
4049 return false;
4050
4051 // Also avoid sibcall optimization if either caller or callee uses struct
4052 // return semantics.
4053 if (isCalleeStructRet || isCallerStructRet)
4054 return false;
4055
4056 // Do not sibcall optimize vararg calls unless all arguments are passed via
4057 // registers.
4058 LLVMContext &C = *DAG.getContext();
4059 if (isVarArg && !Outs.empty()) {
4060 // Optimizing for varargs on Win64 is unlikely to be safe without
4061 // additional testing.
4062 if (IsCalleeWin64 || IsCallerWin64)
4063 return false;
4064
4065 SmallVector<CCValAssign, 16> ArgLocs;
4066 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4067
4068 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4069 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4070 if (!ArgLocs[i].isRegLoc())
4071 return false;
4072 }
4073
4074 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4075 // stack. Therefore, if it's not used by the call it is not safe to optimize
4076 // this into a sibcall.
4077 bool Unused = false;
4078 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4079 if (!Ins[i].Used) {
4080 Unused = true;
4081 break;
4082 }
4083 }
4084 if (Unused) {
4085 SmallVector<CCValAssign, 16> RVLocs;
4086 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4087 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4088 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4089 CCValAssign &VA = RVLocs[i];
4090 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4091 return false;
4092 }
4093 }
4094
4095 // Check that the call results are passed in the same way.
4096 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4097 RetCC_X86, RetCC_X86))
4098 return false;
4099 // The callee has to preserve all registers the caller needs to preserve.
4100 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4101 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4102 if (!CCMatch) {
4103 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4104 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4105 return false;
4106 }
4107
4108 unsigned StackArgsSize = 0;
4109
4110 // If the callee takes no arguments then go on to check the results of the
4111 // call.
4112 if (!Outs.empty()) {
4113 // Check if stack adjustment is needed. For now, do not do this if any
4114 // argument is passed on the stack.
4115 SmallVector<CCValAssign, 16> ArgLocs;
4116 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4117
4118 // Allocate shadow area for Win64
4119 if (IsCalleeWin64)
4120 CCInfo.AllocateStack(32, 8);
4121
4122 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4123 StackArgsSize = CCInfo.getNextStackOffset();
4124
4125 if (CCInfo.getNextStackOffset()) {
4126 // Check if the arguments are already laid out in the right way as
4127 // the caller's fixed stack objects.
4128 MachineFrameInfo &MFI = MF.getFrameInfo();
4129 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4130 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4131 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4132 CCValAssign &VA = ArgLocs[i];
4133 SDValue Arg = OutVals[i];
4134 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4135 if (VA.getLocInfo() == CCValAssign::Indirect)
4136 return false;
4137 if (!VA.isRegLoc()) {
4138 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4139 MFI, MRI, TII, VA))
4140 return false;
4141 }
4142 }
4143 }
4144
4145 bool PositionIndependent = isPositionIndependent();
4146 // If the tailcall address may be in a register, then make sure it's
4147 // possible to register allocate for it. In 32-bit, the call address can
4148 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4149 // callee-saved registers are restored. These happen to be the same
4150 // registers used to pass 'inreg' arguments so watch out for those.
4151 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4152 !isa<ExternalSymbolSDNode>(Callee)) ||
4153 PositionIndependent)) {
4154 unsigned NumInRegs = 0;
4155 // In PIC we need an extra register to formulate the address computation
4156 // for the callee.
4157 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4158
4159 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4160 CCValAssign &VA = ArgLocs[i];
4161 if (!VA.isRegLoc())
4162 continue;
4163 unsigned Reg = VA.getLocReg();
4164 switch (Reg) {
4165 default: break;
4166 case X86::EAX: case X86::EDX: case X86::ECX:
4167 if (++NumInRegs == MaxInRegs)
4168 return false;
4169 break;
4170 }
4171 }
4172 }
4173
4174 const MachineRegisterInfo &MRI = MF.getRegInfo();
4175 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4176 return false;
4177 }
4178
4179 bool CalleeWillPop =
4180 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4181 MF.getTarget().Options.GuaranteedTailCallOpt);
4182
4183 if (unsigned BytesToPop =
4184 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4185 // If we have bytes to pop, the callee must pop them.
4186 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4187 if (!CalleePopMatches)
4188 return false;
4189 } else if (CalleeWillPop && StackArgsSize > 0) {
4190 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4191 return false;
4192 }
4193
4194 return true;
4195}
4196
4197FastISel *
4198X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4199 const TargetLibraryInfo *libInfo) const {
4200 return X86::createFastISel(funcInfo, libInfo);
4201}
4202
4203//===----------------------------------------------------------------------===//
4204// Other Lowering Hooks
4205//===----------------------------------------------------------------------===//
4206
4207static bool MayFoldLoad(SDValue Op) {
4208 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4209}
4210
4211static bool MayFoldIntoStore(SDValue Op) {
4212 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4213}
4214
4215static bool MayFoldIntoZeroExtend(SDValue Op) {
4216 if (Op.hasOneUse()) {
4217 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4218 return (ISD::ZERO_EXTEND == Opcode);
4219 }
4220 return false;
4221}
4222
4223static bool isTargetShuffle(unsigned Opcode) {
4224 switch(Opcode) {
4225 default: return false;
4226 case X86ISD::BLENDI:
4227 case X86ISD::PSHUFB:
4228 case X86ISD::PSHUFD:
4229 case X86ISD::PSHUFHW:
4230 case X86ISD::PSHUFLW:
4231 case X86ISD::SHUFP:
4232 case X86ISD::INSERTPS:
4233 case X86ISD::EXTRQI:
4234 case X86ISD::INSERTQI:
4235 case X86ISD::PALIGNR:
4236 case X86ISD::VSHLDQ:
4237 case X86ISD::VSRLDQ:
4238 case X86ISD::MOVLHPS:
4239 case X86ISD::MOVHLPS:
4240 case X86ISD::MOVLPS:
4241 case X86ISD::MOVLPD:
4242 case X86ISD::MOVSHDUP:
4243 case X86ISD::MOVSLDUP:
4244 case X86ISD::MOVDDUP:
4245 case X86ISD::MOVSS:
4246 case X86ISD::MOVSD:
4247 case X86ISD::UNPCKL:
4248 case X86ISD::UNPCKH:
4249 case X86ISD::VBROADCAST:
4250 case X86ISD::VPERMILPI:
4251 case X86ISD::VPERMILPV:
4252 case X86ISD::VPERM2X128:
4253 case X86ISD::VPERMIL2:
4254 case X86ISD::VPERMI:
4255 case X86ISD::VPPERM:
4256 case X86ISD::VPERMV:
4257 case X86ISD::VPERMV3:
4258 case X86ISD::VPERMIV3:
4259 case X86ISD::VZEXT_MOVL:
4260 return true;
4261 }
4262}
4263
4264static bool isTargetShuffleVariableMask(unsigned Opcode) {
4265 switch (Opcode) {
4266 default: return false;
4267 // Target Shuffles.
4268 case X86ISD::PSHUFB:
4269 case X86ISD::VPERMILPV:
4270 case X86ISD::VPERMIL2:
4271 case X86ISD::VPPERM:
4272 case X86ISD::VPERMV:
4273 case X86ISD::VPERMV3:
4274 case X86ISD::VPERMIV3:
4275 return true;
4276 // 'Faux' Target Shuffles.
4277 case ISD::AND:
4278 case X86ISD::ANDNP:
4279 return true;
4280 }
4281}
4282
4283SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4284 MachineFunction &MF = DAG.getMachineFunction();
4285 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4286 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4287 int ReturnAddrIndex = FuncInfo->getRAIndex();
4288
4289 if (ReturnAddrIndex == 0) {
4290 // Set up a frame object for the return address.
4291 unsigned SlotSize = RegInfo->getSlotSize();
4292 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4293 -(int64_t)SlotSize,
4294 false);
4295 FuncInfo->setRAIndex(ReturnAddrIndex);
4296 }
4297
4298 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4299}
4300
4301bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4302 bool hasSymbolicDisplacement) {
4303 // Offset should fit into 32 bit immediate field.
4304 if (!isInt<32>(Offset))
4305 return false;
4306
4307 // If we don't have a symbolic displacement - we don't have any extra
4308 // restrictions.
4309 if (!hasSymbolicDisplacement)
4310 return true;
4311
4312 // FIXME: Some tweaks might be needed for medium code model.
4313 if (M != CodeModel::Small && M != CodeModel::Kernel)
4314 return false;
4315
4316 // For small code model we assume that latest object is 16MB before end of 31
4317 // bits boundary. We may also accept pretty large negative constants knowing
4318 // that all objects are in the positive half of address space.
4319 if (M == CodeModel::Small && Offset < 16*1024*1024)
4320 return true;
4321
4322 // For kernel code model we know that all object resist in the negative half
4323 // of 32bits address space. We may not accept negative offsets, since they may
4324 // be just off and we may accept pretty large positive ones.
4325 if (M == CodeModel::Kernel && Offset >= 0)
4326 return true;
4327
4328 return false;
4329}
4330
4331/// Determines whether the callee is required to pop its own arguments.
4332/// Callee pop is necessary to support tail calls.
4333bool X86::isCalleePop(CallingConv::ID CallingConv,
4334 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4335 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4336 // can guarantee TCO.
4337 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4338 return true;
4339
4340 switch (CallingConv) {
4341 default:
4342 return false;
4343 case CallingConv::X86_StdCall:
4344 case CallingConv::X86_FastCall:
4345 case CallingConv::X86_ThisCall:
4346 case CallingConv::X86_VectorCall:
4347 return !is64Bit;
4348 }
4349}
4350
4351/// \brief Return true if the condition is an unsigned comparison operation.
4352static bool isX86CCUnsigned(unsigned X86CC) {
4353 switch (X86CC) {
4354 default:
4355 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4355)
;
4356 case X86::COND_E:
4357 case X86::COND_NE:
4358 case X86::COND_B:
4359 case X86::COND_A:
4360 case X86::COND_BE:
4361 case X86::COND_AE:
4362 return true;
4363 case X86::COND_G:
4364 case X86::COND_GE:
4365 case X86::COND_L:
4366 case X86::COND_LE:
4367 return false;
4368 }
4369}
4370
4371static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4372 switch (SetCCOpcode) {
4373 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4373)
;
4374 case ISD::SETEQ: return X86::COND_E;
4375 case ISD::SETGT: return X86::COND_G;
4376 case ISD::SETGE: return X86::COND_GE;
4377 case ISD::SETLT: return X86::COND_L;
4378 case ISD::SETLE: return X86::COND_LE;
4379 case ISD::SETNE: return X86::COND_NE;
4380 case ISD::SETULT: return X86::COND_B;
4381 case ISD::SETUGT: return X86::COND_A;
4382 case ISD::SETULE: return X86::COND_BE;
4383 case ISD::SETUGE: return X86::COND_AE;
4384 }
4385}
4386
4387/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4388/// condition code, returning the condition code and the LHS/RHS of the
4389/// comparison to make.
4390static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4391 bool isFP, SDValue &LHS, SDValue &RHS,
4392 SelectionDAG &DAG) {
4393 if (!isFP) {
4394 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4395 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4396 // X > -1 -> X == 0, jump !sign.
4397 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4398 return X86::COND_NS;
4399 }
4400 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4401 // X < 0 -> X == 0, jump on sign.
4402 return X86::COND_S;
4403 }
4404 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4405 // X < 1 -> X <= 0
4406 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4407 return X86::COND_LE;
4408 }
4409 }
4410
4411 return TranslateIntegerX86CC(SetCCOpcode);
4412 }
4413
4414 // First determine if it is required or is profitable to flip the operands.
4415
4416 // If LHS is a foldable load, but RHS is not, flip the condition.
4417 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4418 !ISD::isNON_EXTLoad(RHS.getNode())) {
4419 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4420 std::swap(LHS, RHS);
4421 }
4422
4423 switch (SetCCOpcode) {
4424 default: break;
4425 case ISD::SETOLT:
4426 case ISD::SETOLE:
4427 case ISD::SETUGT:
4428 case ISD::SETUGE:
4429 std::swap(LHS, RHS);
4430 break;
4431 }
4432
4433 // On a floating point condition, the flags are set as follows:
4434 // ZF PF CF op
4435 // 0 | 0 | 0 | X > Y
4436 // 0 | 0 | 1 | X < Y
4437 // 1 | 0 | 0 | X == Y
4438 // 1 | 1 | 1 | unordered
4439 switch (SetCCOpcode) {
4440 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4440)
;
4441 case ISD::SETUEQ:
4442 case ISD::SETEQ: return X86::COND_E;
4443 case ISD::SETOLT: // flipped
4444 case ISD::SETOGT:
4445 case ISD::SETGT: return X86::COND_A;
4446 case ISD::SETOLE: // flipped
4447 case ISD::SETOGE:
4448 case ISD::SETGE: return X86::COND_AE;
4449 case ISD::SETUGT: // flipped
4450 case ISD::SETULT:
4451 case ISD::SETLT: return X86::COND_B;
4452 case ISD::SETUGE: // flipped
4453 case ISD::SETULE:
4454 case ISD::SETLE: return X86::COND_BE;
4455 case ISD::SETONE:
4456 case ISD::SETNE: return X86::COND_NE;
4457 case ISD::SETUO: return X86::COND_P;
4458 case ISD::SETO: return X86::COND_NP;
4459 case ISD::SETOEQ:
4460 case ISD::SETUNE: return X86::COND_INVALID;
4461 }
4462}
4463
4464/// Is there a floating point cmov for the specific X86 condition code?
4465/// Current x86 isa includes the following FP cmov instructions:
4466/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4467static bool hasFPCMov(unsigned X86CC) {
4468 switch (X86CC) {
4469 default:
4470 return false;
4471 case X86::COND_B:
4472 case X86::COND_BE:
4473 case X86::COND_E:
4474 case X86::COND_P:
4475 case X86::COND_A:
4476 case X86::COND_AE:
4477 case X86::COND_NE:
4478 case X86::COND_NP:
4479 return true;
4480 }
4481}
4482
4483
4484bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4485 const CallInst &I,
4486 unsigned Intrinsic) const {
4487
4488 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4489 if (!IntrData)
4490 return false;
4491
4492 Info.opc = ISD::INTRINSIC_W_CHAIN;
4493 Info.readMem = false;
4494 Info.writeMem = false;
4495 Info.vol = false;
4496 Info.offset = 0;
4497
4498 switch (IntrData->Type) {
4499 case EXPAND_FROM_MEM: {
4500 Info.ptrVal = I.getArgOperand(0);
4501 Info.memVT = MVT::getVT(I.getType());
4502 Info.align = 1;
4503 Info.readMem = true;
4504 break;
4505 }
4506 case COMPRESS_TO_MEM: {
4507 Info.ptrVal = I.getArgOperand(0);
4508 Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
4509 Info.align = 1;
4510 Info.writeMem = true;
4511 break;
4512 }
4513 case TRUNCATE_TO_MEM_VI8:
4514 case TRUNCATE_TO_MEM_VI16:
4515 case TRUNCATE_TO_MEM_VI32: {
4516 Info.ptrVal = I.getArgOperand(0);
4517 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4518 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4519 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4520 ScalarVT = MVT::i8;
4521 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4522 ScalarVT = MVT::i16;
4523 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4524 ScalarVT = MVT::i32;
4525
4526 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4527 Info.align = 1;
4528 Info.writeMem = true;
4529 break;
4530 }
4531 default:
4532 return false;
4533 }
4534
4535 return true;
4536}
4537
4538/// Returns true if the target can instruction select the
4539/// specified FP immediate natively. If false, the legalizer will
4540/// materialize the FP immediate as a load from a constant pool.
4541bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4542 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4543 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4544 return true;
4545 }
4546 return false;
4547}
4548
4549bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4550 ISD::LoadExtType ExtTy,
4551 EVT NewVT) const {
4552 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4553 // relocation target a movq or addq instruction: don't let the load shrink.
4554 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4555 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4556 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4557 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4558 return true;
4559}
4560
4561/// \brief Returns true if it is beneficial to convert a load of a constant
4562/// to just the constant itself.
4563bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4564 Type *Ty) const {
4565 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4565, __PRETTY_FUNCTION__))
;
4566
4567 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4568 if (BitSize == 0 || BitSize > 64)
4569 return false;
4570 return true;
4571}
4572
4573bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4574 // TODO: It might be a win to ease or lift this restriction, but the generic
4575 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4576 if (VT.isVector() && Subtarget.hasAVX512())
4577 return false;
4578
4579 return true;
4580}
4581
4582bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4583 unsigned Index) const {
4584 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4585 return false;
4586
4587 // Mask vectors support all subregister combinations and operations that
4588 // extract half of vector.
4589 if (ResVT.getVectorElementType() == MVT::i1)
4590 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4591 (Index == ResVT.getVectorNumElements()));
4592
4593 return (Index % ResVT.getVectorNumElements()) == 0;
4594}
4595
4596bool X86TargetLowering::isCheapToSpeculateCttz() const {
4597 // Speculate cttz only if we can directly use TZCNT.
4598 return Subtarget.hasBMI();
4599}
4600
4601bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4602 // Speculate ctlz only if we can directly use LZCNT.
4603 return Subtarget.hasLZCNT();
4604}
4605
4606bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4607 const SelectionDAG &DAG) const {
4608 // Do not merge to float value size (128 bytes) if no implicit
4609 // float attribute is set.
4610 bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
4611 Attribute::NoImplicitFloat);
4612
4613 if (NoFloat) {
4614 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4615 return (MemVT.getSizeInBits() <= MaxIntSize);
4616 }
4617 return true;
4618}
4619
4620bool X86TargetLowering::isCtlzFast() const {
4621 return Subtarget.hasFastLZCNT();
4622}
4623
4624bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4625 const Instruction &AndI) const {
4626 return true;
4627}
4628
4629bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4630 if (!Subtarget.hasBMI())
4631 return false;
4632
4633 // There are only 32-bit and 64-bit forms for 'andn'.
4634 EVT VT = Y.getValueType();
4635 if (VT != MVT::i32 && VT != MVT::i64)
4636 return false;
4637
4638 return true;
4639}
4640
4641MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
4642 MVT VT = MVT::getIntegerVT(NumBits);
4643 if (isTypeLegal(VT))
4644 return VT;
4645
4646 // PMOVMSKB can handle this.
4647 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
4648 return MVT::v16i8;
4649
4650 // VPMOVMSKB can handle this.
4651 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
4652 return MVT::v32i8;
4653
4654 // TODO: Allow 64-bit type for 32-bit target.
4655 // TODO: 512-bit types should be allowed, but make sure that those
4656 // cases are handled in combineVectorSizedSetCCEquality().
4657
4658 return MVT::INVALID_SIMPLE_VALUE_TYPE;
4659}
4660
4661/// Val is the undef sentinel value or equal to the specified value.
4662static bool isUndefOrEqual(int Val, int CmpVal) {
4663 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
4664}
4665
4666/// Val is either the undef or zero sentinel value.
4667static bool isUndefOrZero(int Val) {
4668 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
4669}
4670
4671/// Return true if every element in Mask, beginning
4672/// from position Pos and ending in Pos+Size is the undef sentinel value.
4673static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
4674 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4675 if (Mask[i] != SM_SentinelUndef)
4676 return false;
4677 return true;
4678}
4679
4680/// Return true if Val is undef or if its value falls within the
4681/// specified range (L, H].
4682static bool isUndefOrInRange(int Val, int Low, int Hi) {
4683 return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
4684}
4685
4686/// Return true if every element in Mask is undef or if its value
4687/// falls within the specified range (L, H].
4688static bool isUndefOrInRange(ArrayRef<int> Mask,
4689 int Low, int Hi) {
4690 for (int M : Mask)
4691 if (!isUndefOrInRange(M, Low, Hi))
4692 return false;
4693 return true;
4694}
4695
4696/// Return true if Val is undef, zero or if its value falls within the
4697/// specified range (L, H].
4698static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
4699 return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
4700}
4701
4702/// Return true if every element in Mask is undef, zero or if its value
4703/// falls within the specified range (L, H].
4704static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
4705 for (int M : Mask)
4706 if (!isUndefOrZeroOrInRange(M, Low, Hi))
4707 return false;
4708 return true;
4709}
4710
4711/// Return true if every element in Mask, beginning
4712/// from position Pos and ending in Pos+Size, falls within the specified
4713/// sequential range (Low, Low+Size]. or is undef.
4714static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
4715 unsigned Pos, unsigned Size, int Low) {
4716 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
4717 if (!isUndefOrEqual(Mask[i], Low))
4718 return false;
4719 return true;
4720}
4721
4722/// Return true if every element in Mask, beginning
4723/// from position Pos and ending in Pos+Size, falls within the specified
4724/// sequential range (Low, Low+Size], or is undef or is zero.
4725static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4726 unsigned Size, int Low) {
4727 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
4728 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
4729 return false;
4730 return true;
4731}
4732
4733/// Return true if every element in Mask, beginning
4734/// from position Pos and ending in Pos+Size is undef or is zero.
4735static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4736 unsigned Size) {
4737 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4738 if (!isUndefOrZero(Mask[i]))
4739 return false;
4740 return true;
4741}
4742
4743/// \brief Helper function to test whether a shuffle mask could be
4744/// simplified by widening the elements being shuffled.
4745///
4746/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
4747/// leaves it in an unspecified state.
4748///
4749/// NOTE: This must handle normal vector shuffle masks and *target* vector
4750/// shuffle masks. The latter have the special property of a '-2' representing
4751/// a zero-ed lane of a vector.
4752static bool canWidenShuffleElements(ArrayRef<int> Mask,
4753 SmallVectorImpl<int> &WidenedMask) {
4754 WidenedMask.assign(Mask.size() / 2, 0);
4755 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
4756 int M0 = Mask[i];
4757 int M1 = Mask[i + 1];
4758
4759 // If both elements are undef, its trivial.
4760 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
4761 WidenedMask[i / 2] = SM_SentinelUndef;
4762 continue;
4763 }
4764
4765 // Check for an undef mask and a mask value properly aligned to fit with
4766 // a pair of values. If we find such a case, use the non-undef mask's value.
4767 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
4768 WidenedMask[i / 2] = M1 / 2;
4769 continue;
4770 }
4771 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
4772 WidenedMask[i / 2] = M0 / 2;
4773 continue;
4774 }
4775
4776 // When zeroing, we need to spread the zeroing across both lanes to widen.
4777 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
4778 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
4779 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
4780 WidenedMask[i / 2] = SM_SentinelZero;
4781 continue;
4782 }
4783 return false;
4784 }
4785
4786 // Finally check if the two mask values are adjacent and aligned with
4787 // a pair.
4788 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
4789 WidenedMask[i / 2] = M0 / 2;
4790 continue;
4791 }
4792
4793 // Otherwise we can't safely widen the elements used in this shuffle.
4794 return false;
4795 }
4796 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4797, __PRETTY_FUNCTION__))
4797 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4797, __PRETTY_FUNCTION__))
;
4798
4799 return true;
4800}
4801
4802/// Returns true if Elt is a constant zero or a floating point constant +0.0.
4803bool X86::isZeroNode(SDValue Elt) {
4804 return isNullConstant(Elt) || isNullFPConstant(Elt);
4805}
4806
4807// Build a vector of constants.
4808// Use an UNDEF node if MaskElt == -1.
4809// Split 64-bit constants in the 32-bit mode.
4810static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
4811 const SDLoc &dl, bool IsMask = false) {
4812
4813 SmallVector<SDValue, 32> Ops;
4814 bool Split = false;
4815
4816 MVT ConstVecVT = VT;
4817 unsigned NumElts = VT.getVectorNumElements();
4818 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4819 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4820 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4821 Split = true;
4822 }
4823
4824 MVT EltVT = ConstVecVT.getVectorElementType();
4825 for (unsigned i = 0; i < NumElts; ++i) {
4826 bool IsUndef = Values[i] < 0 && IsMask;
4827 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
4828 DAG.getConstant(Values[i], dl, EltVT);
4829 Ops.push_back(OpNode);
4830 if (Split)
4831 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
4832 DAG.getConstant(0, dl, EltVT));
4833 }
4834 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4835 if (Split)
4836 ConstsNode = DAG.getBitcast(VT, ConstsNode);
4837 return ConstsNode;
4838}
4839
4840static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
4841 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4842 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4843, __PRETTY_FUNCTION__))
4843 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4843, __PRETTY_FUNCTION__))
;
4844 SmallVector<SDValue, 32> Ops;
4845 bool Split = false;
4846
4847 MVT ConstVecVT = VT;
4848 unsigned NumElts = VT.getVectorNumElements();
4849 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4850 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4851 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4852 Split = true;
4853 }
4854
4855 MVT EltVT = ConstVecVT.getVectorElementType();
4856 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
4857 if (Undefs[i]) {
4858 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
4859 continue;
4860 }
4861 const APInt &V = Bits[i];
4862 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4862, __PRETTY_FUNCTION__))
;
4863 if (Split) {
4864 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
4865 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
4866 } else if (EltVT == MVT::f32) {
4867 APFloat FV(APFloat::IEEEsingle(), V);
4868 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4869 } else if (EltVT == MVT::f64) {
4870 APFloat FV(APFloat::IEEEdouble(), V);
4871 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4872 } else {
4873 Ops.push_back(DAG.getConstant(V, dl, EltVT));
4874 }
4875 }
4876
4877 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4878 return DAG.getBitcast(VT, ConstsNode);
4879}
4880
4881/// Returns a vector of specified type with all zero elements.
4882static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
4883 SelectionDAG &DAG, const SDLoc &dl) {
4884 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4886, __PRETTY_FUNCTION__))
4885 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4886, __PRETTY_FUNCTION__))
4886 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4886, __PRETTY_FUNCTION__))
;
4887
4888 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
4889 // type. This ensures they get CSE'd. But if the integer type is not
4890 // available, use a floating-point +0.0 instead.
4891 SDValue Vec;
4892 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
4893 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
4894 } else if (VT.getVectorElementType() == MVT::i1) {
4895 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4896, __PRETTY_FUNCTION__))
4896 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4896, __PRETTY_FUNCTION__))
;
4897 assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&(((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4898, __PRETTY_FUNCTION__))
4898 "Unexpected vector type")(((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4898, __PRETTY_FUNCTION__))
;
4899 Vec = DAG.getConstant(0, dl, VT);
4900 } else {
4901 unsigned Num32BitElts = VT.getSizeInBits() / 32;
4902 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
4903 }
4904 return DAG.getBitcast(VT, Vec);
4905}
4906
4907static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
4908 const SDLoc &dl, unsigned vectorWidth) {
4909 EVT VT = Vec.getValueType();
4910 EVT ElVT = VT.getVectorElementType();
4911 unsigned Factor = VT.getSizeInBits()/vectorWidth;
4912 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
4913 VT.getVectorNumElements()/Factor);
4914
4915 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
4916 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
4917 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4917, __PRETTY_FUNCTION__))
;
4918
4919 // This is the index of the first element of the vectorWidth-bit chunk
4920 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
4921 IdxVal &= ~(ElemsPerChunk - 1);
4922
4923 // If the input is a buildvector just emit a smaller one.
4924 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
4925 return DAG.getBuildVector(ResultVT, dl,
4926 Vec->ops().slice(IdxVal, ElemsPerChunk));
4927
4928 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
4929 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
4930}
4931
4932/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
4933/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
4934/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
4935/// instructions or a simple subregister reference. Idx is an index in the
4936/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
4937/// lowering EXTRACT_VECTOR_ELT operations easier.
4938static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
4939 SelectionDAG &DAG, const SDLoc &dl) {
4940 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4941, __PRETTY_FUNCTION__))
4941 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4941, __PRETTY_FUNCTION__))
;
4942 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
4943}
4944
4945/// Generate a DAG to grab 256-bits from a 512-bit vector.
4946static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
4947 SelectionDAG &DAG, const SDLoc &dl) {
4948 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4948, __PRETTY_FUNCTION__))
;
4949 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
4950}
4951
4952static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
4953 SelectionDAG &DAG, const SDLoc &dl,
4954 unsigned vectorWidth) {
4955 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4956, __PRETTY_FUNCTION__))
4956 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4956, __PRETTY_FUNCTION__))
;
4957 // Inserting UNDEF is Result
4958 if (Vec.isUndef())
4959 return Result;
4960 EVT VT = Vec.getValueType();
4961 EVT ElVT = VT.getVectorElementType();
4962 EVT ResultVT = Result.getValueType();
4963
4964 // Insert the relevant vectorWidth bits.
4965 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
4966 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4966, __PRETTY_FUNCTION__))
;
4967
4968 // This is the index of the first element of the vectorWidth-bit chunk
4969 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
4970 IdxVal &= ~(ElemsPerChunk - 1);
4971
4972 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
4973 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
4974}
4975
4976/// Generate a DAG to put 128-bits into a vector > 128 bits. This
4977/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
4978/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
4979/// simple superregister reference. Idx is an index in the 128 bits
4980/// we want. It need not be aligned to a 128-bit boundary. That makes
4981/// lowering INSERT_VECTOR_ELT operations easier.
4982static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
4983 SelectionDAG &DAG, const SDLoc &dl) {
4984 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4984, __PRETTY_FUNCTION__))
;
4985 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
4986}
4987
4988static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
4989 SelectionDAG &DAG, const SDLoc &dl) {
4990 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!")((Vec.getValueType().is256BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is256BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 4990, __PRETTY_FUNCTION__))
;
4991 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
4992}
4993
4994// Return true if the instruction zeroes the unused upper part of the
4995// destination and accepts mask.
4996static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
4997 switch (Opcode) {
4998 default:
4999 return false;
5000 case X86ISD::TESTM:
5001 case X86ISD::TESTNM:
5002 case X86ISD::PCMPEQM:
5003 case X86ISD::PCMPGTM:
5004 case X86ISD::CMPM:
5005 case X86ISD::CMPMU:
5006 return true;
5007 }
5008}
5009
5010/// Insert i1-subvector to i1-vector.
5011static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5012 const X86Subtarget &Subtarget) {
5013
5014 SDLoc dl(Op);
5015 SDValue Vec = Op.getOperand(0);
5016 SDValue SubVec = Op.getOperand(1);
5017 SDValue Idx = Op.getOperand(2);
5018
5019 if (!isa<ConstantSDNode>(Idx))
5020 return SDValue();
5021
5022 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5023 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5024 return Op;
5025
5026 MVT OpVT = Op.getSimpleValueType();
5027 MVT SubVecVT = SubVec.getSimpleValueType();
5028 unsigned NumElems = OpVT.getVectorNumElements();
5029 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5030
5031 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5033, __PRETTY_FUNCTION__))
5032 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5033, __PRETTY_FUNCTION__))
5033 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5033, __PRETTY_FUNCTION__))
;
5034
5035 // There are 3 possible cases:
5036 // 1. Subvector should be inserted in the lower part (IdxVal == 0)
5037 // 2. Subvector should be inserted in the upper part
5038 // (IdxVal + SubVecNumElems == NumElems)
5039 // 3. Subvector should be inserted in the middle (for example v2i1
5040 // to v16i1, index 2)
5041
5042 // If this node widens - by concatenating zeroes - the type of the result
5043 // of a node with instruction that zeroes all upper (irrelevant) bits of the
5044 // output register, mark this node as legal to enable replacing them with
5045 // the v8i1 version of the previous instruction during instruction selection.
5046 // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
5047 // while zeroing all the upper remaining 60 bits of the register. if the
5048 // result of such instruction is inserted into an allZeroVector, then we can
5049 // safely remove insert_vector (in instruction selection) as the cmp instr
5050 // already zeroed the rest of the register.
5051 if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
5052 (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) ||
5053 (SubVec.getOpcode() == ISD::AND &&
5054 (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) ||
5055 isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
5056 return Op;
5057
5058 // extend to natively supported kshift
5059 MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5060 MVT WideOpVT = OpVT;
5061 if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
5062 WideOpVT = MinVT;
5063
5064 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5065 SDValue Undef = DAG.getUNDEF(WideOpVT);
5066 SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5067 Undef, SubVec, ZeroIdx);
5068
5069 // Extract sub-vector if require.
5070 auto ExtractSubVec = [&](SDValue V) {
5071 return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
5072 OpVT, V, ZeroIdx);
5073 };
5074
5075 if (Vec.isUndef()) {
5076 if (IdxVal != 0) {
5077 SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
5078 WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5079 ShiftBits);
5080 }
5081 return ExtractSubVec(WideSubVec);
5082 }
5083
5084 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5085 NumElems = WideOpVT.getVectorNumElements();
5086 unsigned ShiftLeft = NumElems - SubVecNumElems;
5087 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5088 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5089 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5090 Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5091 DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
5092 return ExtractSubVec(Vec);
5093 }
5094
5095 if (IdxVal == 0) {
5096 // Zero lower bits of the Vec
5097 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5098 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5099 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5100 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5101 // Merge them together, SubVec should be zero extended.
5102 WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5103 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5104 SubVec, ZeroIdx);
5105 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
5106 return ExtractSubVec(Vec);
5107 }
5108
5109 // Simple case when we put subvector in the upper part
5110 if (IdxVal + SubVecNumElems == NumElems) {
5111 // Zero upper bits of the Vec
5112 WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5113 DAG.getConstant(IdxVal, dl, MVT::i8));
5114 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5115 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5116 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5117 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5118 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
5119 return ExtractSubVec(Vec);
5120 }
5121 // Subvector should be inserted in the middle - use shuffle
5122 WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
5123 SubVec, ZeroIdx);
5124 SmallVector<int, 64> Mask;
5125 for (unsigned i = 0; i < NumElems; ++i)
5126 Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
5127 i : i + NumElems);
5128 return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
5129}
5130
5131/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
5132/// instructions. This is used because creating CONCAT_VECTOR nodes of
5133/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
5134/// large BUILD_VECTORS.
5135static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
5136 unsigned NumElems, SelectionDAG &DAG,
5137 const SDLoc &dl) {
5138 SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5139 return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
5140}
5141
5142static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
5143 unsigned NumElems, SelectionDAG &DAG,
5144 const SDLoc &dl) {
5145 SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5146 return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
5147}
5148
5149/// Returns a vector of specified type with all bits set.
5150/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5151/// Then bitcast to their original type, ensuring they get CSE'd.
5152static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5153 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5154, __PRETTY_FUNCTION__))
5154 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5154, __PRETTY_FUNCTION__))
;
5155
5156 APInt Ones = APInt::getAllOnesValue(32);
5157 unsigned NumElts = VT.getSizeInBits() / 32;
5158 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5159 return DAG.getBitcast(VT, Vec);
5160}
5161
5162static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
5163 SelectionDAG &DAG) {
5164 EVT InVT = In.getValueType();
6
Calling 'SDValue::getValueType'
5165 assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode")(((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("(X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5165, __PRETTY_FUNCTION__))
;
5166
5167 if (VT.is128BitVector() && InVT.is128BitVector())
5168 return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
5169 : DAG.getZeroExtendVectorInReg(In, DL, VT);
5170
5171 // For 256-bit vectors, we only need the lower (128-bit) input half.
5172 // For 512-bit vectors, we only need the lower input half or quarter.
5173 if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
5174 int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5175 In = extractSubVector(In, 0, DAG, DL,
5176 std::max(128, (int)VT.getSizeInBits() / Scale));
5177 }
5178
5179 return DAG.getNode(Opc, DL, VT, In);
5180}
5181
5182/// Returns a vector_shuffle node for an unpackl operation.
5183static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5184 SDValue V1, SDValue V2) {
5185 SmallVector<int, 8> Mask;
5186 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5187 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5188}
5189
5190/// Returns a vector_shuffle node for an unpackh operation.
5191static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5192 SDValue V1, SDValue V2) {
5193 SmallVector<int, 8> Mask;
5194 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5195 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5196}
5197
5198/// Return a vector_shuffle of the specified vector of zero or undef vector.
5199/// This produces a shuffle where the low element of V2 is swizzled into the
5200/// zero/undef vector, landing at element Idx.
5201/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5202static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5203 bool IsZero,
5204 const X86Subtarget &Subtarget,
5205 SelectionDAG &DAG) {
5206 MVT VT = V2.getSimpleValueType();
5207 SDValue V1 = IsZero
5208 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5209 int NumElems = VT.getVectorNumElements();
5210 SmallVector<int, 16> MaskVec(NumElems);
5211 for (int i = 0; i != NumElems; ++i)
5212 // If this is the insertion idx, put the low elt of V2 here.
5213 MaskVec[i] = (i == Idx) ? NumElems : i;
5214 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5215}
5216
5217static SDValue peekThroughBitcasts(SDValue V) {
5218 while (V.getNode() && V.getOpcode() == ISD::BITCAST)
5219 V = V.getOperand(0);
5220 return V;
5221}
5222
5223static SDValue peekThroughOneUseBitcasts(SDValue V) {
5224 while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
5225 V.getOperand(0).hasOneUse())
5226 V = V.getOperand(0);
5227 return V;
5228}
5229
5230static const Constant *getTargetConstantFromNode(SDValue Op) {
5231 Op = peekThroughBitcasts(Op);
5232
5233 auto *Load = dyn_cast<LoadSDNode>(Op);
5234 if (!Load)
5235 return nullptr;
5236
5237 SDValue Ptr = Load->getBasePtr();
5238 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5239 Ptr->getOpcode() == X86ISD::WrapperRIP)
5240 Ptr = Ptr->getOperand(0);
5241
5242 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5243 if (!CNode || CNode->isMachineConstantPoolEntry())
5244 return nullptr;
5245
5246 return dyn_cast<Constant>(CNode->getConstVal());
5247}
5248
5249// Extract raw constant bits from constant pools.
5250static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5251 APInt &UndefElts,
5252 SmallVectorImpl<APInt> &EltBits,
5253 bool AllowWholeUndefs = true,
5254 bool AllowPartialUndefs = true) {
5255 assert(EltBits.empty() && "Expected an empty EltBits vector")((EltBits.empty() && "Expected an empty EltBits vector"
) ? static_cast<void> (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5255, __PRETTY_FUNCTION__))
;
5256
5257 Op = peekThroughBitcasts(Op);
5258
5259 EVT VT = Op.getValueType();
5260 unsigned SizeInBits = VT.getSizeInBits();
5261 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"
) ? static_cast<void> (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5261, __PRETTY_FUNCTION__))
;
5262 unsigned NumElts = SizeInBits / EltSizeInBits;
5263
5264 // Bitcast a source array of element bits to the target size.
5265 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5266 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5267 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5268 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5269, __PRETTY_FUNCTION__))
5269 "Constant bit sizes don't match")(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5269, __PRETTY_FUNCTION__))
;
5270
5271 // Don't split if we don't allow undef bits.
5272 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5273 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5274 return false;
5275
5276 // If we're already the right size, don't bother bitcasting.
5277 if (NumSrcElts == NumElts) {
5278 UndefElts = UndefSrcElts;
5279 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5280 return true;
5281 }
5282
5283 // Extract all the undef/constant element data and pack into single bitsets.
5284 APInt UndefBits(SizeInBits, 0);
5285 APInt MaskBits(SizeInBits, 0);
5286
5287 for (unsigned i = 0; i != NumSrcElts; ++i) {
5288 unsigned BitOffset = i * SrcEltSizeInBits;
5289 if (UndefSrcElts[i])
5290 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5291 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5292 }
5293
5294 // Split the undef/constant single bitset data into the target elements.
5295 UndefElts = APInt(NumElts, 0);
5296 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5297
5298 for (unsigned i = 0; i != NumElts; ++i) {
5299 unsigned BitOffset = i * EltSizeInBits;
5300 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5301
5302 // Only treat an element as UNDEF if all bits are UNDEF.
5303 if (UndefEltBits.isAllOnesValue()) {
5304 if (!AllowWholeUndefs)
5305 return false;
5306 UndefElts.setBit(i);
5307 continue;
5308 }
5309
5310 // If only some bits are UNDEF then treat them as zero (or bail if not
5311 // supported).
5312 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5313 return false;
5314
5315 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5316 EltBits[i] = Bits.getZExtValue();
5317 }
5318 return true;
5319 };
5320
5321 // Collect constant bits and insert into mask/undef bit masks.
5322 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5323 unsigned UndefBitIndex) {
5324 if (!Cst)
5325 return false;
5326 if (isa<UndefValue>(Cst)) {
5327 Undefs.setBit(UndefBitIndex);
5328 return true;
5329 }
5330 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5331 Mask = CInt->getValue();
5332 return true;
5333 }
5334 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5335 Mask = CFP->getValueAPF().bitcastToAPInt();
5336 return true;
5337 }
5338 return false;
5339 };
5340
5341 // Handle UNDEFs.
5342 if (Op.isUndef()) {
5343 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5344 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5345 return CastBitData(UndefSrcElts, SrcEltBits);
5346 }
5347
5348 // Extract scalar constant bits.
5349 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5350 APInt UndefSrcElts = APInt::getNullValue(1);
5351 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5352 return CastBitData(UndefSrcElts, SrcEltBits);
5353 }
5354
5355 // Extract constant bits from build vector.
5356 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5357 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5358 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5359
5360 APInt UndefSrcElts(NumSrcElts, 0);
5361 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5362 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5363 const SDValue &Src = Op.getOperand(i);
5364 if (Src.isUndef()) {
5365 UndefSrcElts.setBit(i);
5366 continue;
5367 }
5368 auto *Cst = cast<ConstantSDNode>(Src);
5369 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5370 }
5371 return CastBitData(UndefSrcElts, SrcEltBits);
5372 }
5373
5374 // Extract constant bits from constant pool vector.
5375 if (auto *Cst = getTargetConstantFromNode(Op)) {
5376 Type *CstTy = Cst->getType();
5377 if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
5378 return false;
5379
5380 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
5381 unsigned NumSrcElts = CstTy->getVectorNumElements();
5382
5383 APInt UndefSrcElts(NumSrcElts, 0);
5384 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5385 for (unsigned i = 0; i != NumSrcElts; ++i)
5386 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
5387 UndefSrcElts, i))
5388 return false;
5389
5390 return CastBitData(UndefSrcElts, SrcEltBits);
5391 }
5392
5393 // Extract constant bits from a broadcasted constant pool scalar.
5394 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5395 EltSizeInBits <= VT.getScalarSizeInBits()) {
5396 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5397 unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
5398 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5399
5400 APInt UndefSrcElts(NumSrcElts, 0);
5401 SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
5402 if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
5403 if (UndefSrcElts[0])
5404 UndefSrcElts.setBits(0, NumSrcElts);
5405 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
5406 return CastBitData(UndefSrcElts, SrcEltBits);
5407 }
5408 }
5409 }
5410
5411 // Extract a rematerialized scalar constant insertion.
5412 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5413 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5414 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5415 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5416 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5417
5418 APInt UndefSrcElts(NumSrcElts, 0);
5419 SmallVector<APInt, 64> SrcEltBits;
5420 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5421 SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
5422 SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
5423 return CastBitData(UndefSrcElts, SrcEltBits);
5424 }
5425
5426 return false;
5427}
5428
5429static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5430 unsigned MaskEltSizeInBits,
5431 SmallVectorImpl<uint64_t> &RawMask) {
5432 APInt UndefElts;
5433 SmallVector<APInt, 64> EltBits;
5434
5435 // Extract the raw target constant bits.
5436 // FIXME: We currently don't support UNDEF bits or mask entries.
5437 if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
5438 EltBits, /* AllowWholeUndefs */ false,
5439 /* AllowPartialUndefs */ false))
5440 return false;
5441
5442 // Insert the extracted elements into the mask.
5443 for (APInt Elt : EltBits)
5444 RawMask.push_back(Elt.getZExtValue());
5445
5446 return true;
5447}
5448
5449/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
5450/// Note: This ignores saturation, so inputs must be checked first.
5451static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
5452 bool Unary) {
5453 assert(Mask.empty() && "Expected an empty shuffle mask vector")((Mask.empty() && "Expected an empty shuffle mask vector"
) ? static_cast<void> (0) : __assert_fail ("Mask.empty() && \"Expected an empty shuffle mask vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5453, __PRETTY_FUNCTION__))
;
5454 unsigned NumElts = VT.getVectorNumElements();
5455 unsigned NumLanes = VT.getSizeInBits() / 128;
5456 unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();
5457 unsigned Offset = Unary ? 0 : NumElts;
5458
5459 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
5460 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5461 Mask.push_back(Elt + (Lane * NumEltsPerLane));
5462 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5463 Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
5464 }
5465}
5466
5467/// Calculates the shuffle mask corresponding to the target-specific opcode.
5468/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
5469/// operands in \p Ops, and returns true.
5470/// Sets \p IsUnary to true if only one source is used. Note that this will set
5471/// IsUnary for shuffles which use a single input multiple times, and in those
5472/// cases it will adjust the mask to only have indices within that single input.
5473/// It is an error to call this with non-empty Mask/Ops vectors.
5474static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
5475 SmallVectorImpl<SDValue> &Ops,
5476 SmallVectorImpl<int> &Mask, bool &IsUnary) {
5477 unsigned NumElems = VT.getVectorNumElements();
5478 SDValue ImmN;
5479
5480 assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector")((Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"
) ? static_cast<void> (0) : __assert_fail ("Mask.empty() && \"getTargetShuffleMask expects an empty Mask vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5480, __PRETTY_FUNCTION__))
;
5481 assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector")((Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"
) ? static_cast<void> (0) : __assert_fail ("Ops.empty() && \"getTargetShuffleMask expects an empty Ops vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5481, __PRETTY_FUNCTION__))
;
5482
5483 IsUnary = false;
5484 bool IsFakeUnary = false;
5485 switch(N->getOpcode()) {
5486 case X86ISD::BLENDI:
5487 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5487, __PRETTY_FUNCTION__))
;
5488 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5488, __PRETTY_FUNCTION__))
;
5489 ImmN = N->getOperand(N->getNumOperands()-1);
5490 DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5491 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5492 break;
5493 case X86ISD::SHUFP:
5494 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5494, __PRETTY_FUNCTION__))
;
5495 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5495, __PRETTY_FUNCTION__))
;
5496 ImmN = N->getOperand(N->getNumOperands()-1);
5497 DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5498 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5499 break;
5500 case X86ISD::INSERTPS:
5501 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5501, __PRETTY_FUNCTION__))
;
5502 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5502, __PRETTY_FUNCTION__))
;
5503 ImmN = N->getOperand(N->getNumOperands()-1);
5504 DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5505 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5506 break;
5507 case X86ISD::EXTRQI:
5508 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5508, __PRETTY_FUNCTION__))
;
5509 if (isa<ConstantSDNode>(N->getOperand(1)) &&
5510 isa<ConstantSDNode>(N->getOperand(2))) {
5511 int BitLen = N->getConstantOperandVal(1);
5512 int BitIdx = N->getConstantOperandVal(2);
5513 DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
5514 IsUnary = true;
5515 }
5516 break;
5517 case X86ISD::INSERTQI:
5518 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5518, __PRETTY_FUNCTION__))
;
5519 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5519, __PRETTY_FUNCTION__))
;
5520 if (isa<ConstantSDNode>(N->getOperand(2)) &&
5521 isa<ConstantSDNode>(N->getOperand(3))) {
5522 int BitLen = N->getConstantOperandVal(2);
5523 int BitIdx = N->getConstantOperandVal(3);
5524 DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
5525 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5526 }
5527 break;
5528 case X86ISD::UNPCKH:
5529 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5529, __PRETTY_FUNCTION__))
;
5530 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5530, __PRETTY_FUNCTION__))
;
5531 DecodeUNPCKHMask(VT, Mask);
5532 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5533 break;
5534 case X86ISD::UNPCKL:
5535 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5535, __PRETTY_FUNCTION__))
;
5536 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5536, __PRETTY_FUNCTION__))
;
5537 DecodeUNPCKLMask(VT, Mask);
5538 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5539 break;
5540 case X86ISD::MOVHLPS:
5541 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5541, __PRETTY_FUNCTION__))
;
5542 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5542, __PRETTY_FUNCTION__))
;
5543 DecodeMOVHLPSMask(NumElems, Mask);
5544 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5545 break;
5546 case X86ISD::MOVLHPS:
5547 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5547, __PRETTY_FUNCTION__))
;
5548 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5548, __PRETTY_FUNCTION__))
;
5549 DecodeMOVLHPSMask(NumElems, Mask);
5550 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5551 break;
5552 case X86ISD::PALIGNR:
5553 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5553, __PRETTY_FUNCTION__))
;
5554 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5554, __PRETTY_FUNCTION__))
;
5555 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5555, __PRETTY_FUNCTION__))
;
5556 ImmN = N->getOperand(N->getNumOperands()-1);
5557 DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5558 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5559 Ops.push_back(N->getOperand(1));
5560 Ops.push_back(N->getOperand(0));
5561 break;
5562 case X86ISD::VSHLDQ:
5563 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5563, __PRETTY_FUNCTION__))
;
5564 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5564, __PRETTY_FUNCTION__))
;
5565 ImmN = N->getOperand(N->getNumOperands() - 1);
5566 DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5567 IsUnary = true;
5568 break;
5569 case X86ISD::VSRLDQ:
5570 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5570, __PRETTY_FUNCTION__))
;
5571 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5571, __PRETTY_FUNCTION__))
;
5572 ImmN = N->getOperand(N->getNumOperands() - 1);
5573 DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5574 IsUnary = true;
5575 break;
5576 case X86ISD::PSHUFD:
5577 case X86ISD::VPERMILPI:
5578 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5578, __PRETTY_FUNCTION__))
;
5579 ImmN = N->getOperand(N->getNumOperands()-1);
5580 DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5581 IsUnary = true;
5582 break;
5583 case X86ISD::PSHUFHW:
5584 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5584, __PRETTY_FUNCTION__))
;
5585 ImmN = N->getOperand(N->getNumOperands()-1);
5586 DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5587 IsUnary = true;
5588 break;
5589 case X86ISD::PSHUFLW:
5590 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5590, __PRETTY_FUNCTION__))
;
5591 ImmN = N->getOperand(N->getNumOperands()-1);
5592 DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5593 IsUnary = true;
5594 break;
5595 case X86ISD::VZEXT_MOVL:
5596 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5596, __PRETTY_FUNCTION__))
;
5597 DecodeZeroMoveLowMask(VT, Mask);
5598 IsUnary = true;
5599 break;
5600 case X86ISD::VBROADCAST: {
5601 SDValue N0 = N->getOperand(0);
5602 // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
5603 // add the pre-extracted value to the Ops vector.
5604 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5605 N0.getOperand(0).getValueType() == VT &&
5606 N0.getConstantOperandVal(1) == 0)
5607 Ops.push_back(N0.getOperand(0));
5608
5609 // We only decode broadcasts of same-sized vectors, unless the broadcast
5610 // came from an extract from the original width. If we found one, we
5611 // pushed it the Ops vector above.
5612 if (N0.getValueType() == VT || !Ops.empty()) {
5613 DecodeVectorBroadcast(VT, Mask);
5614 IsUnary = true;
5615 break;
5616 }
5617 return false;
5618 }
5619 case X86ISD::VPERMILPV: {
5620 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5620, __PRETTY_FUNCTION__))
;
5621 IsUnary = true;
5622 SDValue MaskNode = N->getOperand(1);
5623 unsigned MaskEltSize = VT.getScalarSizeInBits();
5624 SmallVector<uint64_t, 32> RawMask;
5625 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5626 DecodeVPERMILPMask(VT, RawMask, Mask);
5627 break;
5628 }
5629 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5630 DecodeVPERMILPMask(C, MaskEltSize, Mask);
5631 break;
5632 }
5633 return false;
5634 }
5635 case X86ISD::PSHUFB: {
5636 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5636, __PRETTY_FUNCTION__))
;
5637 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5637, __PRETTY_FUNCTION__))
;
5638 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5638, __PRETTY_FUNCTION__))
;
5639 IsUnary = true;
5640 SDValue MaskNode = N->getOperand(1);
5641 SmallVector<uint64_t, 32> RawMask;
5642 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5643 DecodePSHUFBMask(RawMask, Mask);
5644 break;
5645 }
5646 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5647 DecodePSHUFBMask(C, Mask);
5648 break;
5649 }
5650 return false;
5651 }
5652 case X86ISD::VPERMI:
5653 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5653, __PRETTY_FUNCTION__))
;
5654 ImmN = N->getOperand(N->getNumOperands()-1);
5655 DecodeVPERMMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5656 IsUnary = true;
5657 break;
5658 case X86ISD::MOVSS:
5659 case X86ISD::MOVSD:
5660 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5660, __PRETTY_FUNCTION__))
;
5661 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5661, __PRETTY_FUNCTION__))
;
5662 DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
5663 break;
5664 case X86ISD::VPERM2X128:
5665 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5665, __PRETTY_FUNCTION__))
;
5666 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5666, __PRETTY_FUNCTION__))
;
5667 ImmN = N->getOperand(N->getNumOperands()-1);
5668 DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5669 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5670 break;
5671 case X86ISD::MOVSLDUP:
5672 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5672, __PRETTY_FUNCTION__))
;
5673 DecodeMOVSLDUPMask(VT, Mask);
5674 IsUnary = true;
5675 break;
5676 case X86ISD::MOVSHDUP:
5677 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5677, __PRETTY_FUNCTION__))
;
5678 DecodeMOVSHDUPMask(VT, Mask);
5679 IsUnary = true;
5680 break;
5681 case X86ISD::MOVDDUP:
5682 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5682, __PRETTY_FUNCTION__))
;
5683 DecodeMOVDDUPMask(VT, Mask);
5684 IsUnary = true;
5685 break;
5686 case X86ISD::MOVLPD:
5687 case X86ISD::MOVLPS:
5688 // Not yet implemented
5689 return false;
5690 case X86ISD::VPERMIL2: {
5691 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5691, __PRETTY_FUNCTION__))
;
5692 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5692, __PRETTY_FUNCTION__))
;
5693 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5694 unsigned MaskEltSize = VT.getScalarSizeInBits();
5695 SDValue MaskNode = N->getOperand(2);
5696 SDValue CtrlNode = N->getOperand(3);
5697 if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
5698 unsigned CtrlImm = CtrlOp->getZExtValue();
5699 SmallVector<uint64_t, 32> RawMask;
5700 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5701 DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
5702 break;
5703 }
5704 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5705 DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
5706 break;
5707 }
5708 }
5709 return false;
5710 }
5711 case X86ISD::VPPERM: {
5712 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5712, __PRETTY_FUNCTION__))
;
5713 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5713, __PRETTY_FUNCTION__))
;
5714 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5715 SDValue MaskNode = N->getOperand(2);
5716 SmallVector<uint64_t, 32> RawMask;
5717 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5718 DecodeVPPERMMask(RawMask, Mask);
5719 break;
5720 }
5721 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5722 DecodeVPPERMMask(C, Mask);
5723 break;
5724 }
5725 return false;
5726 }
5727 case X86ISD::VPERMV: {
5728 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5728, __PRETTY_FUNCTION__))
;
5729 IsUnary = true;
5730 // Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
5731 Ops.push_back(N->getOperand(1));
5732 SDValue MaskNode = N->getOperand(0);
5733 SmallVector<uint64_t, 32> RawMask;
5734 unsigned MaskEltSize = VT.getScalarSizeInBits();
5735 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5736 DecodeVPERMVMask(RawMask, Mask);
5737 break;
5738 }
5739 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5740 DecodeVPERMVMask(C, MaskEltSize, Mask);
5741 break;
5742 }
5743 return false;
5744 }
5745 case X86ISD::VPERMV3: {
5746 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")((N->getOperand(0).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5746, __PRETTY_FUNCTION__))
;
5747 assert(N->getOperand(2).getValueType() == VT && "Unexpected value type")((N->getOperand(2).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(2).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5747, __PRETTY_FUNCTION__))
;
5748 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
5749 // Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
5750 Ops.push_back(N->getOperand(0));
5751 Ops.push_back(N->getOperand(2));
5752 SDValue MaskNode = N->getOperand(1);
5753 unsigned MaskEltSize = VT.getScalarSizeInBits();
5754 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5755 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5756 break;
5757 }
5758 return false;
5759 }
5760 case X86ISD::VPERMIV3: {
5761 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")((N->getOperand(1).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5761, __PRETTY_FUNCTION__))
;
5762 assert(N->getOperand(2).getValueType() == VT && "Unexpected value type")((N->getOperand(2).getValueType() == VT && "Unexpected value type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(2).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5762, __PRETTY_FUNCTION__))
;
5763 IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
5764 // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
5765 Ops.push_back(N->getOperand(1));
5766 Ops.push_back(N->getOperand(2));
5767 SDValue MaskNode = N->getOperand(0);
5768 unsigned MaskEltSize = VT.getScalarSizeInBits();
5769 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5770 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5771 break;
5772 }
5773 return false;
5774 }
5775 default: llvm_unreachable("unknown target shuffle node")::llvm::llvm_unreachable_internal("unknown target shuffle node"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5775)
;
5776 }
5777
5778 // Empty mask indicates the decode failed.
5779 if (Mask.empty())
5780 return false;
5781
5782 // Check if we're getting a shuffle mask with zero'd elements.
5783 if (!AllowSentinelZero)
5784 if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
5785 return false;
5786
5787 // If we have a fake unary shuffle, the shuffle mask is spread across two
5788 // inputs that are actually the same node. Re-map the mask to always point
5789 // into the first input.
5790 if (IsFakeUnary)
5791 for (int &M : Mask)
5792 if (M >= (int)Mask.size())
5793 M -= Mask.size();
5794
5795 // If we didn't already add operands in the opcode-specific code, default to
5796 // adding 1 or 2 operands starting at 0.
5797 if (Ops.empty()) {
5798 Ops.push_back(N->getOperand(0));
5799 if (!IsUnary || IsFakeUnary)
5800 Ops.push_back(N->getOperand(1));
5801 }
5802
5803 return true;
5804}
5805
5806/// Check a target shuffle mask's inputs to see if we can set any values to
5807/// SM_SentinelZero - this is for elements that are known to be zero
5808/// (not just zeroable) from their inputs.
5809/// Returns true if the target shuffle mask was decoded.
5810static bool setTargetShuffleZeroElements(SDValue N,
5811 SmallVectorImpl<int> &Mask,
5812 SmallVectorImpl<SDValue> &Ops) {
5813 bool IsUnary;
5814 if (!isTargetShuffle(N.getOpcode()))
5815 return false;
5816
5817 MVT VT = N.getSimpleValueType();
5818 if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
5819 return false;
5820
5821 SDValue V1 = Ops[0];
5822 SDValue V2 = IsUnary ? V1 : Ops[1];
5823
5824 V1 = peekThroughBitcasts(V1);
5825 V2 = peekThroughBitcasts(V2);
5826
5827 assert((VT.getSizeInBits() % Mask.size()) == 0 &&(((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5828, __PRETTY_FUNCTION__))
5828 "Illegal split of shuffle value type")(((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5828, __PRETTY_FUNCTION__))
;
5829 unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
5830
5831 // Extract known constant input data.
5832 APInt UndefSrcElts[2];
5833 SmallVector<APInt, 32> SrcEltBits[2];
5834 bool IsSrcConstant[2] = {
5835 getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
5836 SrcEltBits[0], true, false),
5837 getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
5838 SrcEltBits[1], true, false)};
5839
5840 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
5841 int M = Mask[i];
5842
5843 // Already decoded as SM_SentinelZero / SM_SentinelUndef.
5844 if (M < 0)
5845 continue;
5846
5847 // Determine shuffle input and normalize the mask.
5848 unsigned SrcIdx = M / Size;
5849 SDValue V = M < Size ? V1 : V2;
5850 M %= Size;
5851
5852 // We are referencing an UNDEF input.
5853 if (V.isUndef()) {
5854 Mask[i] = SM_SentinelUndef;
5855 continue;
5856 }
5857
5858 // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
5859 // TODO: We currently only set UNDEF for integer types - floats use the same
5860 // registers as vectors and many of the scalar folded loads rely on the
5861 // SCALAR_TO_VECTOR pattern.
5862 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5863 (Size % V.getValueType().getVectorNumElements()) == 0) {
5864 int Scale = Size / V.getValueType().getVectorNumElements();
5865 int Idx = M / Scale;
5866 if (Idx != 0 && !VT.isFloatingPoint())
5867 Mask[i] = SM_SentinelUndef;
5868 else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
5869 Mask[i] = SM_SentinelZero;
5870 continue;
5871 }
5872
5873 // Attempt to extract from the source's constant bits.
5874 if (IsSrcConstant[SrcIdx]) {
5875 if (UndefSrcElts[SrcIdx][M])
5876 Mask[i] = SM_SentinelUndef;
5877 else if (SrcEltBits[SrcIdx][M] == 0)
5878 Mask[i] = SM_SentinelZero;
5879 }
5880 }
5881
5882 assert(VT.getVectorNumElements() == Mask.size() &&((VT.getVectorNumElements() == Mask.size() && "Different mask size from vector size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5883, __PRETTY_FUNCTION__))
5883 "Different mask size from vector size!")((VT.getVectorNumElements() == Mask.size() && "Different mask size from vector size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5883, __PRETTY_FUNCTION__))
;
5884 return true;
5885}
5886
5887// Attempt to decode ops that could be represented as a shuffle mask.
5888// The decoded shuffle mask may contain a different number of elements to the
5889// destination value type.
5890static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
5891 SmallVectorImpl<SDValue> &Ops,
5892 SelectionDAG &DAG) {
5893 Mask.clear();
5894 Ops.clear();
5895
5896 MVT VT = N.getSimpleValueType();
5897 unsigned NumElts = VT.getVectorNumElements();
5898 unsigned NumSizeInBits = VT.getSizeInBits();
5899 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
5900 assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&(((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0
&& "Expected byte aligned value types") ? static_cast
<void> (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5901, __PRETTY_FUNCTION__))
5901 "Expected byte aligned value types")(((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0
&& "Expected byte aligned value types") ? static_cast
<void> (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5901, __PRETTY_FUNCTION__))
;
5902
5903 unsigned Opcode = N.getOpcode();
5904 switch (Opcode) {
5905 case ISD::AND:
5906 case X86ISD::ANDNP: {
5907 // Attempt to decode as a per-byte mask.
5908 APInt UndefElts;
5909 SmallVector<APInt, 32> EltBits;
5910 SDValue N0 = N.getOperand(0);
5911 SDValue N1 = N.getOperand(1);
5912 bool IsAndN = (X86ISD::ANDNP == Opcode);
5913 uint64_t ZeroMask = IsAndN ? 255 : 0;
5914 if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
5915 return false;
5916 for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
5917 if (UndefElts[i]) {
5918 Mask.push_back(SM_SentinelUndef);
5919 continue;
5920 }
5921 uint64_t ByteBits = EltBits[i].getZExtValue();
5922 if (ByteBits != 0 && ByteBits != 255)
5923 return false;
5924 Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
5925 }
5926 Ops.push_back(IsAndN ? N1 : N0);
5927 return true;
5928 }
5929 case ISD::SCALAR_TO_VECTOR: {
5930 // Match against a scalar_to_vector of an extract from a vector,
5931 // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
5932 SDValue N0 = N.getOperand(0);
5933 SDValue SrcExtract;
5934
5935 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5936 N0.getOperand(0).getValueType() == VT) ||
5937 (N0.getOpcode() == X86ISD::PEXTRW &&
5938 N0.getOperand(0).getValueType() == MVT::v8i16) ||
5939 (N0.getOpcode() == X86ISD::PEXTRB &&
5940 N0.getOperand(0).getValueType() == MVT::v16i8)) {
5941 SrcExtract = N0;
5942 }
5943
5944 if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
5945 return false;
5946
5947 SDValue SrcVec = SrcExtract.getOperand(0);
5948 EVT SrcVT = SrcVec.getValueType();
5949 unsigned NumSrcElts = SrcVT.getVectorNumElements();
5950 unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
5951
5952 unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
5953 if (NumSrcElts <= SrcIdx)
5954 return false;
5955
5956 Ops.push_back(SrcVec);
5957 Mask.push_back(SrcIdx);
5958 Mask.append(NumZeros, SM_SentinelZero);
5959 Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
5960 return true;
5961 }
5962 case X86ISD::PINSRB:
5963 case X86ISD::PINSRW: {
5964 SDValue InVec = N.getOperand(0);
5965 SDValue InScl = N.getOperand(1);
5966 uint64_t InIdx = N.getConstantOperandVal(2);
5967 assert(InIdx < NumElts && "Illegal insertion index")((InIdx < NumElts && "Illegal insertion index") ? static_cast
<void> (0) : __assert_fail ("InIdx < NumElts && \"Illegal insertion index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5967, __PRETTY_FUNCTION__))
;
5968
5969 // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
5970 if (X86::isZeroNode(InScl)) {
5971 Ops.push_back(InVec);
5972 for (unsigned i = 0; i != NumElts; ++i)
5973 Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
5974 return true;
5975 }
5976
5977 // Attempt to recognise a PINSR*(PEXTR*) shuffle pattern.
5978 // TODO: Expand this to support INSERT_VECTOR_ELT/etc.
5979 unsigned ExOp =
5980 (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
5981 if (InScl.getOpcode() != ExOp)
5982 return false;
5983
5984 SDValue ExVec = InScl.getOperand(0);
5985 uint64_t ExIdx = InScl.getConstantOperandVal(1);
5986 assert(ExIdx < NumElts && "Illegal extraction index")((ExIdx < NumElts && "Illegal extraction index") ?
static_cast<void> (0) : __assert_fail ("ExIdx < NumElts && \"Illegal extraction index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5986, __PRETTY_FUNCTION__))
;
5987 Ops.push_back(InVec);
5988 Ops.push_back(ExVec);
5989 for (unsigned i = 0; i != NumElts; ++i)
5990 Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
5991 return true;
5992 }
5993 case X86ISD::PACKSS:
5994 case X86ISD::PACKUS: {
5995 SDValue N0 = N.getOperand(0);
5996 SDValue N1 = N.getOperand(1);
5997 assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&((N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
N1.getValueType().getVectorNumElements() == (NumElts / 2) &&
"Unexpected input value type") ? static_cast<void> (0)
: __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5999, __PRETTY_FUNCTION__))
5998 N1.getValueType().getVectorNumElements() == (NumElts / 2) &&((N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
N1.getValueType().getVectorNumElements() == (NumElts / 2) &&
"Unexpected input value type") ? static_cast<void> (0)
: __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5999, __PRETTY_FUNCTION__))
5999 "Unexpected input value type")((N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
N1.getValueType().getVectorNumElements() == (NumElts / 2) &&
"Unexpected input value type") ? static_cast<void> (0)
: __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 5999, __PRETTY_FUNCTION__))
;
6000
6001 // If we know input saturation won't happen we can treat this
6002 // as a truncation shuffle.
6003 if (Opcode == X86ISD::PACKSS) {
6004 if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) ||
6005 (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt))
6006 return false;
6007 } else {
6008 APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
6009 if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) ||
6010 (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask)))
6011 return false;
6012 }
6013
6014 bool IsUnary = (N0 == N1);
6015
6016 Ops.push_back(N0);
6017 if (!IsUnary)
6018 Ops.push_back(N1);
6019
6020 createPackShuffleMask(VT, Mask, IsUnary);
6021 return true;
6022 }
6023 case X86ISD::VSHLI:
6024 case X86ISD::VSRLI: {
6025 uint64_t ShiftVal = N.getConstantOperandVal(1);
6026 // Out of range bit shifts are guaranteed to be zero.
6027 if (NumBitsPerElt <= ShiftVal) {
6028 Mask.append(NumElts, SM_SentinelZero);
6029 return true;
6030 }
6031
6032 // We can only decode 'whole byte' bit shifts as shuffles.
6033 if ((ShiftVal % 8) != 0)
6034 break;
6035
6036 uint64_t ByteShift = ShiftVal / 8;
6037 unsigned NumBytes = NumSizeInBits / 8;
6038 unsigned NumBytesPerElt = NumBitsPerElt / 8;
6039 Ops.push_back(N.getOperand(0));
6040
6041 // Clear mask to all zeros and insert the shifted byte indices.
6042 Mask.append(NumBytes, SM_SentinelZero);
6043
6044 if (X86ISD::VSHLI == Opcode) {
6045 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
6046 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6047 Mask[i + j] = i + j - ByteShift;
6048 } else {
6049 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
6050 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6051 Mask[i + j - ByteShift] = i + j;
6052 }
6053 return true;
6054 }
6055 case ISD::ZERO_EXTEND_VECTOR_INREG:
6056 case X86ISD::VZEXT: {
6057 // TODO - add support for VPMOVZX with smaller input vector types.
6058 SDValue Src = N.getOperand(0);
6059 MVT SrcVT = Src.getSimpleValueType();
6060 if (NumSizeInBits != SrcVT.getSizeInBits())
6061 break;
6062 DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask);
6063 Ops.push_back(Src);
6064 return true;
6065 }
6066 }
6067
6068 return false;
6069}
6070
6071/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
6072static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
6073 SmallVectorImpl<int> &Mask) {
6074 int MaskWidth = Mask.size();
6075 SmallVector<SDValue, 16> UsedInputs;
6076 for (int i = 0, e = Inputs.size(); i < e; ++i) {
6077 int lo = UsedInputs.size() * MaskWidth;
6078 int hi = lo + MaskWidth;
6079
6080 // Strip UNDEF input usage.
6081 if (Inputs[i].isUndef())
6082 for (int &M : Mask)
6083 if ((lo <= M) && (M < hi))
6084 M = SM_SentinelUndef;
6085
6086 // Check for unused inputs.
6087 if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
6088 UsedInputs.push_back(Inputs[i]);
6089 continue;
6090 }
6091 for (int &M : Mask)
6092 if (lo <= M)
6093 M -= MaskWidth;
6094 }
6095 Inputs = UsedInputs;
6096}
6097
6098/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
6099/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
6100/// remaining input indices in case we now have a unary shuffle and adjust the
6101/// inputs accordingly.
6102/// Returns true if the target shuffle mask was decoded.
6103static bool resolveTargetShuffleInputs(SDValue Op,
6104 SmallVectorImpl<SDValue> &Inputs,
6105 SmallVectorImpl<int> &Mask,
6106 SelectionDAG &DAG) {
6107 if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
6108 if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
6109 return false;
6110
6111 resolveTargetShuffleInputsAndMask(Inputs, Mask);
6112 return true;
6113}
6114
6115/// Returns the scalar element that will make up the ith
6116/// element of the result of the vector shuffle.
6117static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
6118 unsigned Depth) {
6119 if (Depth == 6)
6120 return SDValue(); // Limit search depth.
6121
6122 SDValue V = SDValue(N, 0);
6123 EVT VT = V.getValueType();
6124 unsigned Opcode = V.getOpcode();
6125
6126 // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
6127 if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
6128 int Elt = SV->getMaskElt(Index);
6129
6130 if (Elt < 0)
6131 return DAG.getUNDEF(VT.getVectorElementType());
6132
6133 unsigned NumElems = VT.getVectorNumElements();
6134 SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
6135 : SV->getOperand(1);
6136 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
6137 }
6138
6139 // Recurse into target specific vector shuffles to find scalars.
6140 if (isTargetShuffle(Opcode)) {
6141 MVT ShufVT = V.getSimpleValueType();
6142 MVT ShufSVT = ShufVT.getVectorElementType();
6143 int NumElems = (int)ShufVT.getVectorNumElements();
6144 SmallVector<int, 16> ShuffleMask;
6145 SmallVector<SDValue, 16> ShuffleOps;
6146 bool IsUnary;
6147
6148 if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
6149 return SDValue();
6150
6151 int Elt = ShuffleMask[Index];
6152 if (Elt == SM_SentinelZero)
6153 return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT)
6154 : DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT);
6155 if (Elt == SM_SentinelUndef)
6156 return DAG.getUNDEF(ShufSVT);
6157
6158 assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range")((0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range"
) ? static_cast<void> (0) : __assert_fail ("0 <= Elt && Elt < (2*NumElems) && \"Shuffle index out of range\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6158, __PRETTY_FUNCTION__))
;
6159 SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
6160 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
6161 Depth+1);
6162 }
6163
6164 // Actual nodes that may contain scalar elements
6165 if (Opcode == ISD::BITCAST) {
6166 V = V.getOperand(0);
6167 EVT SrcVT = V.getValueType();
6168 unsigned NumElems = VT.getVectorNumElements();
6169
6170 if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
6171 return SDValue();
6172 }
6173
6174 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
6175 return (Index == 0) ? V.getOperand(0)
6176 : DAG.getUNDEF(VT.getVectorElementType());
6177
6178 if (V.getOpcode() == ISD::BUILD_VECTOR)
6179 return V.getOperand(Index);
6180
6181 return SDValue();
6182}
6183
6184// Use PINSRB/PINSRW/PINSRD to create a build vector.
6185static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
6186 unsigned NumNonZero, unsigned NumZero,
6187 SelectionDAG &DAG,
6188 const X86Subtarget &Subtarget) {
6189 MVT VT = Op.getSimpleValueType();
6190 unsigned NumElts = VT.getVectorNumElements();
6191 assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) ||((((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT ==
MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41
())) && "Illegal vector insertion") ? static_cast<
void> (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6193, __PRETTY_FUNCTION__))
6192 ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) &&((((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT ==
MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41
())) && "Illegal vector insertion") ? static_cast<
void> (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6193, __PRETTY_FUNCTION__))
6193 "Illegal vector insertion")((((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT ==
MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41
())) && "Illegal vector insertion") ? static_cast<
void> (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6193, __PRETTY_FUNCTION__))
;
6194
6195 SDLoc dl(Op);
6196 SDValue V;
6197 bool First = true;
6198
6199 for (unsigned i = 0; i < NumElts; ++i) {
6200 bool IsNonZero = (NonZeros & (1 << i)) != 0;
6201 if (!IsNonZero)
6202 continue;
6203
6204 // If the build vector contains zeros or our first insertion is not the
6205 // first index then insert into zero vector to break any register
6206 // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
6207 if (First) {
6208 First = false;
6209 if (NumZero || 0 != i)
6210 V = getZeroVector(VT, Subtarget, DAG, dl);
6211 else {
6212 assert(0 == i && "Expected insertion into zero-index")((0 == i && "Expected insertion into zero-index") ? static_cast
<void> (0) : __assert_fail ("0 == i && \"Expected insertion into zero-index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6212, __PRETTY_FUNCTION__))
;
6213 V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
6214 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6215 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6216 V = DAG.getBitcast(VT, V);
6217 continue;
6218 }
6219 }
6220 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i),
6221 DAG.getIntPtrConstant(i, dl));
6222 }
6223
6224 return V;
6225}
6226
6227/// Custom lower build_vector of v16i8.
6228static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
6229 unsigned NumNonZero, unsigned NumZero,
6230 SelectionDAG &DAG,
6231 const X86Subtarget &Subtarget) {
6232 if (NumNonZero > 8 && !Subtarget.hasSSE41())
6233 return SDValue();
6234
6235 // SSE4.1 - use PINSRB to insert each byte directly.
6236 if (Subtarget.hasSSE41())
6237 return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
6238 Subtarget);
6239
6240 SDLoc dl(Op);
6241 SDValue V;
6242 bool First = true;
6243
6244 // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
6245 for (unsigned i = 0; i < 16; ++i) {
6246 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
6247 if (ThisIsNonZero && First) {
6248 if (NumZero)
6249 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
6250 else
6251 V = DAG.getUNDEF(MVT::v8i16);
6252 First = false;
6253 }
6254
6255 if ((i & 1) != 0) {
6256 // FIXME: Investigate extending to i32 instead of just i16.
6257 // FIXME: Investigate combining the first 4 bytes as a i32 instead.
6258 SDValue ThisElt, LastElt;
6259 bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
6260 if (LastIsNonZero) {
6261 LastElt =
6262 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
6263 }
6264 if (ThisIsNonZero) {
6265 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
6266 ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
6267 DAG.getConstant(8, dl, MVT::i8));
6268 if (LastIsNonZero)
6269 ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
6270 } else
6271 ThisElt = LastElt;
6272
6273 if (ThisElt) {
6274 if (1 == i) {
6275 V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
6276 : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
6277 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6278 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6279 V = DAG.getBitcast(MVT::v8i16, V);
6280 } else {
6281 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
6282 DAG.getIntPtrConstant(i / 2, dl));
6283 }
6284 }
6285 }
6286 }
6287
6288 return DAG.getBitcast(MVT::v16i8, V);
6289}
6290
6291/// Custom lower build_vector of v8i16.
6292static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
6293 unsigned NumNonZero, unsigned NumZero,
6294 SelectionDAG &DAG,
6295 const X86Subtarget &Subtarget) {
6296 if (NumNonZero > 4 && !Subtarget.hasSSE41())
6297 return SDValue();
6298
6299 // Use PINSRW to insert each byte directly.
6300 return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
6301 Subtarget);
6302}
6303
6304/// Custom lower build_vector of v4i32 or v4f32.
6305static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
6306 const X86Subtarget &Subtarget) {
6307 // Find all zeroable elements.
6308 std::bitset<4> Zeroable;
6309 for (int i=0; i < 4; ++i) {
6310 SDValue Elt = Op->getOperand(i);
6311 Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
6312 }
6313 assert(Zeroable.size() - Zeroable.count() > 1 &&((Zeroable.size() - Zeroable.count() > 1 && "We expect at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6314, __PRETTY_FUNCTION__))
6314 "We expect at least two non-zero elements!")((Zeroable.size() - Zeroable.count() > 1 && "We expect at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6314, __PRETTY_FUNCTION__))
;
6315
6316 // We only know how to deal with build_vector nodes where elements are either
6317 // zeroable or extract_vector_elt with constant index.
6318 SDValue FirstNonZero;
6319 unsigned FirstNonZeroIdx;
6320 for (unsigned i=0; i < 4; ++i) {
6321 if (Zeroable[i])
6322 continue;
6323 SDValue Elt = Op->getOperand(i);
6324 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6325 !isa<ConstantSDNode>(Elt.getOperand(1)))
6326 return SDValue();
6327 // Make sure that this node is extracting from a 128-bit vector.
6328 MVT VT = Elt.getOperand(0).getSimpleValueType();
6329 if (!VT.is128BitVector())
6330 return SDValue();
6331 if (!FirstNonZero.getNode()) {
6332 FirstNonZero = Elt;
6333 FirstNonZeroIdx = i;
6334 }
6335 }
6336
6337 assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!")((FirstNonZero.getNode() && "Unexpected build vector of all zeros!"
) ? static_cast<void> (0) : __assert_fail ("FirstNonZero.getNode() && \"Unexpected build vector of all zeros!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6337, __PRETTY_FUNCTION__))
;
6338 SDValue V1 = FirstNonZero.getOperand(0);
6339 MVT VT = V1.getSimpleValueType();
6340
6341 // See if this build_vector can be lowered as a blend with zero.
6342 SDValue Elt;
6343 unsigned EltMaskIdx, EltIdx;
6344 int Mask[4];
6345 for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
6346 if (Zeroable[EltIdx]) {
6347 // The zero vector will be on the right hand side.
6348 Mask[EltIdx] = EltIdx+4;
6349 continue;
6350 }
6351
6352 Elt = Op->getOperand(EltIdx);
6353 // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
6354 EltMaskIdx = Elt.getConstantOperandVal(1);
6355 if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
6356 break;
6357 Mask[EltIdx] = EltIdx;
6358 }
6359
6360 if (EltIdx == 4) {
6361 // Let the shuffle legalizer deal with blend operations.
6362 SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
6363 if (V1.getSimpleValueType() != VT)
6364 V1 = DAG.getBitcast(VT, V1);
6365 return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
6366 }
6367
6368 // See if we can lower this build_vector to a INSERTPS.
6369 if (!Subtarget.hasSSE41())
6370 return SDValue();
6371
6372 SDValue V2 = Elt.getOperand(0);
6373 if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
6374 V1 = SDValue();
6375
6376 bool CanFold = true;
6377 for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
6378 if (Zeroable[i])
6379 continue;
6380
6381 SDValue Current = Op->getOperand(i);
6382 SDValue SrcVector = Current->getOperand(0);
6383 if (!V1.getNode())
6384 V1 = SrcVector;
6385 CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
6386 }
6387
6388 if (!CanFold)
6389 return SDValue();
6390
6391 assert(V1.getNode() && "Expected at least two non-zero elements!")((V1.getNode() && "Expected at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("V1.getNode() && \"Expected at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6391, __PRETTY_FUNCTION__))
;
6392 if (V1.getSimpleValueType() != MVT::v4f32)
6393 V1 = DAG.getBitcast(MVT::v4f32, V1);
6394 if (V2.getSimpleValueType() != MVT::v4f32)
6395 V2 = DAG.getBitcast(MVT::v4f32, V2);
6396
6397 // Ok, we can emit an INSERTPS instruction.
6398 unsigned ZMask = Zeroable.to_ulong();
6399
6400 unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
6401 assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!")(((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"
) ? static_cast<void> (0) : __assert_fail ("(InsertPSMask & ~0xFFu) == 0 && \"Invalid mask!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6401, __PRETTY_FUNCTION__))
;
6402 SDLoc DL(Op);
6403 SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
6404 DAG.getIntPtrConstant(InsertPSMask, DL));
6405 return DAG.getBitcast(VT, Result);
6406}
6407
6408/// Return a vector logical shift node.
6409static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
6410 SelectionDAG &DAG, const TargetLowering &TLI,
6411 const SDLoc &dl) {
6412 assert(VT.is128BitVector() && "Unknown type for VShift")((VT.is128BitVector() && "Unknown type for VShift") ?
static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && \"Unknown type for VShift\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6412, __PRETTY_FUNCTION__))
;
6413 MVT ShVT = MVT::v16i8;
6414 unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
6415 SrcOp = DAG.getBitcast(ShVT, SrcOp);
6416 MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
6417 assert(NumBits % 8 == 0 && "Only support byte sized shifts")((NumBits % 8 == 0 && "Only support byte sized shifts"
) ? static_cast<void> (0) : __assert_fail ("NumBits % 8 == 0 && \"Only support byte sized shifts\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6417, __PRETTY_FUNCTION__))
;
6418 SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
6419 return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
6420}
6421
6422static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
6423 SelectionDAG &DAG) {
6424
6425 // Check if the scalar load can be widened into a vector load. And if
6426 // the address is "base + cst" see if the cst can be "absorbed" into
6427 // the shuffle mask.
6428 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
6429 SDValue Ptr = LD->getBasePtr();
6430 if (!ISD::isNormalLoad(LD) || LD->isVolatile())
6431 return SDValue();
6432 EVT PVT = LD->getValueType(0);
6433 if (PVT != MVT::i32 && PVT != MVT::f32)
6434 return SDValue();
6435
6436 int FI = -1;
6437 int64_t Offset = 0;
6438 if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
6439 FI = FINode->getIndex();
6440 Offset = 0;
6441 } else if (DAG.isBaseWithConstantOffset(Ptr) &&
6442 isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
6443 FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
6444 Offset = Ptr.getConstantOperandVal(1);
6445 Ptr = Ptr.getOperand(0);
6446 } else {
6447 return SDValue();
6448 }
6449
6450 // FIXME: 256-bit vector instructions don't require a strict alignment,
6451 // improve this code to support it better.
6452 unsigned RequiredAlign = VT.getSizeInBits()/8;
6453 SDValue Chain = LD->getChain();
6454 // Make sure the stack object alignment is at least 16 or 32.
6455 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6456 if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
6457 if (MFI.isFixedObjectIndex(FI)) {
6458 // Can't change the alignment. FIXME: It's possible to compute
6459 // the exact stack offset and reference FI + adjust offset instead.
6460 // If someone *really* cares about this. That's the way to implement it.
6461 return SDValue();
6462 } else {
6463 MFI.setObjectAlignment(FI, RequiredAlign);
6464 }
6465 }
6466
6467 // (Offset % 16 or 32) must be multiple of 4. Then address is then
6468 // Ptr + (Offset & ~15).
6469 if (Offset < 0)
6470 return SDValue();
6471 if ((Offset % RequiredAlign) & 3)
6472 return SDValue();
6473 int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
6474 if (StartOffset) {
6475 SDLoc DL(Ptr);
6476 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6477 DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
6478 }
6479
6480 int EltNo = (Offset - StartOffset) >> 2;
6481 unsigned NumElems = VT.getVectorNumElements();
6482
6483 EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
6484 SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
6485 LD->getPointerInfo().getWithOffset(StartOffset));
6486
6487 SmallVector<int, 8> Mask(NumElems, EltNo);
6488
6489 return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask);
6490 }
6491
6492 return SDValue();
6493}
6494
6495/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
6496/// elements can be replaced by a single large load which has the same value as
6497/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
6498///
6499/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
6500static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
6501 const SDLoc &DL, SelectionDAG &DAG,
6502 const X86Subtarget &Subtarget,
6503 bool isAfterLegalize) {
6504 unsigned NumElems = Elts.size();
6505
6506 int LastLoadedElt = -1;
6507 SmallBitVector LoadMask(NumElems, false);
6508 SmallBitVector ZeroMask(NumElems, false);
6509 SmallBitVector UndefMask(NumElems, false);
6510
6511 // For each element in the initializer, see if we've found a load, zero or an
6512 // undef.
6513 for (unsigned i = 0; i < NumElems; ++i) {
6514 SDValue Elt = peekThroughBitcasts(Elts[i]);
6515 if (!Elt.getNode())
6516 return SDValue();
6517
6518 if (Elt.isUndef())
6519 UndefMask[i] = true;
6520 else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode()))
6521 ZeroMask[i] = true;
6522 else if (ISD::isNON_EXTLoad(Elt.getNode())) {
6523 LoadMask[i] = true;
6524 LastLoadedElt = i;
6525 // Each loaded element must be the correct fractional portion of the
6526 // requested vector load.
6527 if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
6528 return SDValue();
6529 } else
6530 return SDValue();
6531 }
6532 assert((ZeroMask | UndefMask | LoadMask).count() == NumElems &&(((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
"Incomplete element masks") ? static_cast<void> (0) : __assert_fail
("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6533, __PRETTY_FUNCTION__))
6533 "Incomplete element masks")(((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
"Incomplete element masks") ? static_cast<void> (0) : __assert_fail
("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6533, __PRETTY_FUNCTION__))
;
6534
6535 // Handle Special Cases - all undef or undef/zero.
6536 if (UndefMask.count() == NumElems)
6537 return DAG.getUNDEF(VT);
6538
6539 // FIXME: Should we return this as a BUILD_VECTOR instead?
6540 if ((ZeroMask | UndefMask).count() == NumElems)
6541 return VT.isInteger() ? DAG.getConstant(0, DL, VT)
6542 : DAG.getConstantFP(0.0, DL, VT);
6543
6544 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6545 int FirstLoadedElt = LoadMask.find_first();
6546 SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
6547 LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
6548 EVT LDBaseVT = EltBase.getValueType();
6549
6550 // Consecutive loads can contain UNDEFS but not ZERO elements.
6551 // Consecutive loads with UNDEFs and ZEROs elements require a
6552 // an additional shuffle stage to clear the ZERO elements.
6553 bool IsConsecutiveLoad = true;
6554 bool IsConsecutiveLoadWithZeros = true;
6555 for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
6556 if (LoadMask[i]) {
6557 SDValue Elt = peekThroughBitcasts(Elts[i]);
6558 LoadSDNode *LD = cast<LoadSDNode>(Elt);
6559 if (!DAG.areNonVolatileConsecutiveLoads(
6560 LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
6561 i - FirstLoadedElt)) {
6562 IsConsecutiveLoad = false;
6563 IsConsecutiveLoadWithZeros = false;
6564 break;
6565 }
6566 } else if (ZeroMask[i]) {
6567 IsConsecutiveLoad = false;
6568 }
6569 }
6570
6571 SmallVector<LoadSDNode *, 8> Loads;
6572 for (int i = FirstLoadedElt; i <= LastLoadedElt; ++i)
6573 if (LoadMask[i])
6574 Loads.push_back(cast<LoadSDNode>(peekThroughBitcasts(Elts[i])));
6575
6576 auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
6577 auto MMOFlags = LDBase->getMemOperand()->getFlags();
6578 assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&((!(MMOFlags & MachineMemOperand::MOVolatile) && "Cannot merge volatile loads."
) ? static_cast<void> (0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6579, __PRETTY_FUNCTION__))
6579 "Cannot merge volatile loads.")((!(MMOFlags & MachineMemOperand::MOVolatile) && "Cannot merge volatile loads."
) ? static_cast<void> (0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6579, __PRETTY_FUNCTION__))
;
6580 SDValue NewLd =
6581 DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
6582 LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
6583 for (auto *LD : Loads)
6584 DAG.makeEquivalentMemoryOrdering(LD, NewLd);
6585 return NewLd;
6586 };
6587
6588 // LOAD - all consecutive load/undefs (must start/end with a load).
6589 // If we have found an entire vector of loads and undefs, then return a large
6590 // load of the entire vector width starting at the base pointer.
6591 // If the vector contains zeros, then attempt to shuffle those elements.
6592 if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
6593 (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
6594 assert(LDBase && "Did not find base load for merging consecutive loads")((LDBase && "Did not find base load for merging consecutive loads"
) ? static_cast<void> (0) : __assert_fail ("LDBase && \"Did not find base load for merging consecutive loads\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6594, __PRETTY_FUNCTION__))
;
6595 EVT EltVT = LDBase->getValueType(0);
6596 // Ensure that the input vector size for the merged loads matches the
6597 // cumulative size of the input elements.
6598 if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
6599 return SDValue();
6600
6601 if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
6602 return SDValue();
6603
6604 // Don't create 256-bit non-temporal aligned loads without AVX2 as these
6605 // will lower to regular temporal loads and use the cache.
6606 if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 &&
6607 VT.is256BitVector() && !Subtarget.hasInt256())
6608 return SDValue();
6609
6610 if (IsConsecutiveLoad)
6611 return CreateLoad(VT, LDBase);
6612
6613 // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
6614 // vector and a zero vector to clear out the zero elements.
6615 if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
6616 SmallVector<int, 4> ClearMask(NumElems, -1);
6617 for (unsigned i = 0; i < NumElems; ++i) {
6618 if (ZeroMask[i])
6619 ClearMask[i] = i + NumElems;
6620 else if (LoadMask[i])
6621 ClearMask[i] = i;
6622 }
6623 SDValue V = CreateLoad(VT, LDBase);
6624 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
6625 : DAG.getConstantFP(0.0, DL, VT);
6626 return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
6627 }
6628 }
6629
6630 int LoadSize =
6631 (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
6632
6633 // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
6634 if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
6635 (LoadSize == 32 || LoadSize == 64) &&
6636 ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
6637 MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
6638 : MVT::getIntegerVT(LoadSize);
6639 MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
6640 if (TLI.isTypeLegal(VecVT)) {
6641 SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
6642 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
6643 SDValue ResNode =
6644 DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
6645 LDBase->getPointerInfo(),
6646 LDBase->getAlignment(),
6647 false/*isVolatile*/, true/*ReadMem*/,
6648 false/*WriteMem*/);
6649 for (auto *LD : Loads)
6650 DAG.makeEquivalentMemoryOrdering(LD, ResNode);
6651 return DAG.getBitcast(VT, ResNode);
6652 }
6653 }
6654
6655 return SDValue();
6656}
6657
6658static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
6659 unsigned SplatBitSize, LLVMContext &C) {
6660 unsigned ScalarSize = VT.getScalarSizeInBits();
6661 unsigned NumElm = SplatBitSize / ScalarSize;
6662
6663 SmallVector<Constant *, 32> ConstantVec;
6664 for (unsigned i = 0; i < NumElm; i++) {
6665 APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
6666 Constant *Const;
6667 if (VT.isFloatingPoint()) {
6668 if (ScalarSize == 32) {
6669 Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
6670 } else {
6671 assert(ScalarSize == 64 && "Unsupported floating point scalar size")((ScalarSize == 64 && "Unsupported floating point scalar size"
) ? static_cast<void> (0) : __assert_fail ("ScalarSize == 64 && \"Unsupported floating point scalar size\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6671, __PRETTY_FUNCTION__))
;
6672 Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
6673 }
6674 } else
6675 Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
6676 ConstantVec.push_back(Const);
6677 }
6678 return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
6679}
6680
6681static bool isUseOfShuffle(SDNode *N) {
6682 for (auto *U : N->uses()) {
6683 if (isTargetShuffle(U->getOpcode()))
6684 return true;
6685 if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
6686 return isUseOfShuffle(U);
6687 }
6688 return false;
6689}
6690
6691// Check if the current node of build vector is a zero extended vector.
6692// // If so, return the value extended.
6693// // For example: (0,0,0,a,0,0,0,a,0,0,0,a,0,0,0,a) returns a.
6694// // NumElt - return the number of zero extended identical values.
6695// // EltType - return the type of the value include the zero extend.
6696static SDValue isSplatZeroExtended(const BuildVectorSDNode *Op,
6697 unsigned &NumElt, MVT &EltType) {
6698 SDValue ExtValue = Op->getOperand(0);
6699 unsigned NumElts = Op->getNumOperands();
6700 unsigned Delta = NumElts;
6701
6702 for (unsigned i = 1; i < NumElts; i++) {
6703 if (Op->getOperand(i) == ExtValue) {
6704 Delta = i;
6705 break;
6706 }
6707 if (!(Op->getOperand(i).isUndef() || isNullConstant(Op->getOperand(i))))
6708 return SDValue();
6709 }
6710 if (!isPowerOf2_32(Delta) || Delta == 1)
6711 return SDValue();
6712
6713 for (unsigned i = Delta; i < NumElts; i++) {
6714 if (i % Delta == 0) {
6715 if (Op->getOperand(i) != ExtValue)
6716 return SDValue();
6717 } else if (!(isNullConstant(Op->getOperand(i)) ||
6718 Op->getOperand(i).isUndef()))
6719 return SDValue();
6720 }
6721 unsigned EltSize = Op->getSimpleValueType(0).getScalarSizeInBits();
6722 unsigned ExtVTSize = EltSize * Delta;
6723 EltType = MVT::getIntegerVT(ExtVTSize);
6724 NumElt = NumElts / Delta;
6725 return ExtValue;
6726}
6727
6728/// Attempt to use the vbroadcast instruction to generate a splat value
6729/// from a splat BUILD_VECTOR which uses:
6730/// a. A single scalar load, or a constant.
6731/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
6732///
6733/// The VBROADCAST node is returned when a pattern is found,
6734/// or SDValue() otherwise.
6735static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
6736 const X86Subtarget &Subtarget,
6737 SelectionDAG &DAG) {
6738 // VBROADCAST requires AVX.
6739 // TODO: Splats could be generated for non-AVX CPUs using SSE
6740 // instructions, but there's less potential gain for only 128-bit vectors.
6741 if (!Subtarget.hasAVX())
6742 return SDValue();
6743
6744 MVT VT = BVOp->getSimpleValueType(0);
6745 SDLoc dl(BVOp);
6746
6747 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Unsupported vector type for broadcast.") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6748, __PRETTY_FUNCTION__))
6748 "Unsupported vector type for broadcast.")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Unsupported vector type for broadcast.") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6748, __PRETTY_FUNCTION__))
;
6749
6750 BitVector UndefElements;
6751 SDValue Ld = BVOp->getSplatValue(&UndefElements);
6752
6753 // Attempt to use VBROADCASTM
6754 // From this paterrn:
6755 // a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
6756 // b. t1 = (build_vector t0 t0)
6757 //
6758 // Create (VBROADCASTM v2i1 X)
6759 if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
6760 MVT EltType = VT.getScalarType();
6761 unsigned NumElts = VT.getVectorNumElements();
6762 SDValue BOperand;
6763 SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType);
6764 if ((ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) ||
6765 (Ld && Ld.getOpcode() == ISD::ZERO_EXTEND &&
6766 Ld.getOperand(0).getOpcode() == ISD::BITCAST)) {
6767 if (ZeroExtended)
6768 BOperand = ZeroExtended.getOperand(0);
6769 else
6770 BOperand = Ld.getOperand(0).getOperand(0);
6771 if (BOperand.getValueType().isVector() &&
6772 BOperand.getSimpleValueType().getVectorElementType() == MVT::i1) {
6773 if ((EltType == MVT::i64 && (VT.getVectorElementType() == MVT::i8 ||
6774 NumElts == 8)) || // for broadcastmb2q
6775 (EltType == MVT::i32 && (VT.getVectorElementType() == MVT::i16 ||
6776 NumElts == 16))) { // for broadcastmw2d
6777 SDValue Brdcst =
6778 DAG.getNode(X86ISD::VBROADCASTM, dl,
6779 MVT::getVectorVT(EltType, NumElts), BOperand);
6780 return DAG.getBitcast(VT, Brdcst);
6781 }
6782 }
6783 }
6784 }
6785
6786 // We need a splat of a single value to use broadcast, and it doesn't
6787 // make any sense if the value is only in one element of the vector.
6788 if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
6789 APInt SplatValue, Undef;
6790 unsigned SplatBitSize;
6791 bool HasUndef;
6792 // Check if this is a repeated constant pattern suitable for broadcasting.
6793 if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
6794 SplatBitSize > VT.getScalarSizeInBits() &&
6795 SplatBitSize < VT.getSizeInBits()) {
6796 // Avoid replacing with broadcast when it's a use of a shuffle
6797 // instruction to preserve the present custom lowering of shuffles.
6798 if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
6799 return SDValue();
6800 // replace BUILD_VECTOR with broadcast of the repeated constants.
6801 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6802 LLVMContext *Ctx = DAG.getContext();
6803 MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
6804 if (Subtarget.hasAVX()) {
6805 if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
6806 !(SplatBitSize == 64 && Subtarget.is32Bit())) {
6807 // Splatted value can fit in one INTEGER constant in constant pool.
6808 // Load the constant and broadcast it.
6809 MVT CVT = MVT::getIntegerVT(SplatBitSize);
6810 Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
6811 Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
6812 SDValue CP = DAG.getConstantPool(C, PVT);
6813 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6814
6815 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6816 Ld = DAG.getLoad(
6817 CVT, dl, DAG.getEntryNode(), CP,
6818 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6819 Alignment);
6820 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6821 MVT::getVectorVT(CVT, Repeat), Ld);
6822 return DAG.getBitcast(VT, Brdcst);
6823 } else if (SplatBitSize == 32 || SplatBitSize == 64) {
6824 // Splatted value can fit in one FLOAT constant in constant pool.
6825 // Load the constant and broadcast it.
6826 // AVX have support for 32 and 64 bit broadcast for floats only.
6827 // No 64bit integer in 32bit subtarget.
6828 MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
6829 // Lower the splat via APFloat directly, to avoid any conversion.
6830 Constant *C =
6831 SplatBitSize == 32
6832 ? ConstantFP::get(*Ctx,
6833 APFloat(APFloat::IEEEsingle(), SplatValue))
6834 : ConstantFP::get(*Ctx,
6835 APFloat(APFloat::IEEEdouble(), SplatValue));
6836 SDValue CP = DAG.getConstantPool(C, PVT);
6837 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6838
6839 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6840 Ld = DAG.getLoad(
6841 CVT, dl, DAG.getEntryNode(), CP,
6842 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6843 Alignment);
6844 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6845 MVT::getVectorVT(CVT, Repeat), Ld);
6846 return DAG.getBitcast(VT, Brdcst);
6847 } else if (SplatBitSize > 64) {
6848 // Load the vector of constants and broadcast it.
6849 MVT CVT = VT.getScalarType();
6850 Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
6851 *Ctx);
6852 SDValue VCP = DAG.getConstantPool(VecC, PVT);
6853 unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
6854 unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
6855 Ld = DAG.getLoad(
6856 MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
6857 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6858 Alignment);
6859 SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
6860 return DAG.getBitcast(VT, Brdcst);
6861 }
6862 }
6863 }
6864 return SDValue();
6865 }
6866
6867 bool ConstSplatVal =
6868 (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
6869
6870 // Make sure that all of the users of a non-constant load are from the
6871 // BUILD_VECTOR node.
6872 if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
6873 return SDValue();
6874
6875 unsigned ScalarSize = Ld.getValueSizeInBits();
6876 bool IsGE256 = (VT.getSizeInBits() >= 256);
6877
6878 // When optimizing for size, generate up to 5 extra bytes for a broadcast
6879 // instruction to save 8 or more bytes of constant pool data.
6880 // TODO: If multiple splats are generated to load the same constant,
6881 // it may be detrimental to overall size. There needs to be a way to detect
6882 // that condition to know if this is truly a size win.
6883 bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
6884
6885 // Handle broadcasting a single constant scalar from the constant pool
6886 // into a vector.
6887 // On Sandybridge (no AVX2), it is still better to load a constant vector
6888 // from the constant pool and not to broadcast it from a scalar.
6889 // But override that restriction when optimizing for size.
6890 // TODO: Check if splatting is recommended for other AVX-capable CPUs.
6891 if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) {
6892 EVT CVT = Ld.getValueType();
6893 assert(!CVT.isVector() && "Must not broadcast a vector type")((!CVT.isVector() && "Must not broadcast a vector type"
) ? static_cast<void> (0) : __assert_fail ("!CVT.isVector() && \"Must not broadcast a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6893, __PRETTY_FUNCTION__))
;
6894
6895 // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
6896 // For size optimization, also splat v2f64 and v2i64, and for size opt
6897 // with AVX2, also splat i8 and i16.
6898 // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
6899 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6900 (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
6901 const Constant *C = nullptr;
6902 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
6903 C = CI->getConstantIntValue();
6904 else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
6905 C = CF->getConstantFPValue();
6906
6907 assert(C && "Invalid constant type")((C && "Invalid constant type") ? static_cast<void
> (0) : __assert_fail ("C && \"Invalid constant type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6907, __PRETTY_FUNCTION__))
;
6908
6909 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6910 SDValue CP =
6911 DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
6912 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6913 Ld = DAG.getLoad(
6914 CVT, dl, DAG.getEntryNode(), CP,
6915 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6916 Alignment);
6917
6918 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6919 }
6920 }
6921
6922 bool IsLoad = ISD::isNormalLoad(Ld.getNode());
6923
6924 // Handle AVX2 in-register broadcasts.
6925 if (!IsLoad && Subtarget.hasInt256() &&
6926 (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
6927 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6928
6929 // The scalar source must be a normal load.
6930 if (!IsLoad)
6931 return SDValue();
6932
6933 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6934 (Subtarget.hasVLX() && ScalarSize == 64))
6935 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6936
6937 // The integer check is needed for the 64-bit into 128-bit so it doesn't match
6938 // double since there is no vbroadcastsd xmm
6939 if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
6940 if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
6941 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6942 }
6943
6944 // Unsupported broadcast.
6945 return SDValue();
6946}
6947
6948/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
6949/// underlying vector and index.
6950///
6951/// Modifies \p ExtractedFromVec to the real vector and returns the real
6952/// index.
6953static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
6954 SDValue ExtIdx) {
6955 int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
6956 if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
6957 return Idx;
6958
6959 // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
6960 // lowered this:
6961 // (extract_vector_elt (v8f32 %vreg1), Constant<6>)
6962 // to:
6963 // (extract_vector_elt (vector_shuffle<2,u,u,u>
6964 // (extract_subvector (v8f32 %vreg0), Constant<4>),
6965 // undef)
6966 // Constant<0>)
6967 // In this case the vector is the extract_subvector expression and the index
6968 // is 2, as specified by the shuffle.
6969 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
6970 SDValue ShuffleVec = SVOp->getOperand(0);
6971 MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
6972 assert(ShuffleVecVT.getVectorElementType() ==((ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType
().getVectorElementType()) ? static_cast<void> (0) : __assert_fail
("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6973, __PRETTY_FUNCTION__))
6973 ExtractedFromVec.getSimpleValueType().getVectorElementType())((ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType
().getVectorElementType()) ? static_cast<void> (0) : __assert_fail
("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 6973, __PRETTY_FUNCTION__))
;
6974
6975 int ShuffleIdx = SVOp->getMaskElt(Idx);
6976 if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
6977 ExtractedFromVec = ShuffleVec;
6978 return ShuffleIdx;
6979 }
6980 return Idx;
6981}
6982
6983static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
6984 MVT VT = Op.getSimpleValueType();
6985
6986 // Skip if insert_vec_elt is not supported.
6987 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6988 if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
6989 return SDValue();
6990
6991 SDLoc DL(Op);
6992 unsigned NumElems = Op.getNumOperands();
6993
6994 SDValue VecIn1;
6995 SDValue VecIn2;
6996 SmallVector<unsigned, 4> InsertIndices;
6997 SmallVector<int, 8> Mask(NumElems, -1);
6998
6999 for (unsigned i = 0; i != NumElems; ++i) {
7000 unsigned Opc = Op.getOperand(i).getOpcode();
7001
7002 if (Opc == ISD::UNDEF)
7003 continue;
7004
7005 if (Opc != ISD::EXTRACT_VECTOR_ELT) {
7006 // Quit if more than 1 elements need inserting.
7007 if (InsertIndices.size() > 1)
7008 return SDValue();
7009
7010 InsertIndices.push_back(i);
7011 continue;
7012 }
7013
7014 SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
7015 SDValue ExtIdx = Op.getOperand(i).getOperand(1);
7016
7017 // Quit if non-constant index.
7018 if (!isa<ConstantSDNode>(ExtIdx))
7019 return SDValue();
7020 int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
7021
7022 // Quit if extracted from vector of different type.
7023 if (ExtractedFromVec.getValueType() != VT)
7024 return SDValue();
7025
7026 if (!VecIn1.getNode())
7027 VecIn1 = ExtractedFromVec;
7028 else if (VecIn1 != ExtractedFromVec) {
7029 if (!VecIn2.getNode())
7030 VecIn2 = ExtractedFromVec;
7031 else if (VecIn2 != ExtractedFromVec)
7032 // Quit if more than 2 vectors to shuffle
7033 return SDValue();
7034 }
7035
7036 if (ExtractedFromVec == VecIn1)
7037 Mask[i] = Idx;
7038 else if (ExtractedFromVec == VecIn2)
7039 Mask[i] = Idx + NumElems;
7040 }
7041
7042 if (!VecIn1.getNode())
7043 return SDValue();
7044
7045 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
7046 SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
7047
7048 for (unsigned Idx : InsertIndices)
7049 NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
7050 DAG.getIntPtrConstant(Idx, DL));
7051
7052 return NV;
7053}
7054
7055static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
7056 assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7058, __PRETTY_FUNCTION__))
7057 Op.getScalarValueSizeInBits() == 1 &&((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7058, __PRETTY_FUNCTION__))
7058 "Can not convert non-constant vector")((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7058, __PRETTY_FUNCTION__))
;
7059 uint64_t Immediate = 0;
7060 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
7061 SDValue In = Op.getOperand(idx);
7062 if (!In.isUndef())
7063 Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
7064 }
7065 SDLoc dl(Op);
7066 MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
7067 return DAG.getConstant(Immediate, dl, VT);
7068}
7069// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
7070SDValue
7071X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
7072
7073 MVT VT = Op.getSimpleValueType();
7074 assert((VT.getVectorElementType() == MVT::i1) &&(((VT.getVectorElementType() == MVT::i1) && "Unexpected type in LowerBUILD_VECTORvXi1!"
) ? static_cast<void> (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7075, __PRETTY_FUNCTION__))
7075 "Unexpected type in LowerBUILD_VECTORvXi1!")(((VT.getVectorElementType() == MVT::i1) && "Unexpected type in LowerBUILD_VECTORvXi1!"
) ? static_cast<void> (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7075, __PRETTY_FUNCTION__))
;
7076
7077 SDLoc dl(Op);
7078 if (ISD::isBuildVectorAllZeros(Op.getNode()))
7079 return DAG.getTargetConstant(0, dl, VT);
7080
7081 if (ISD::isBuildVectorAllOnes(Op.getNode()))
7082 return DAG.getTargetConstant(1, dl, VT);
7083
7084 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
7085 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
7086 // Split the pieces.
7087 SDValue Lower =
7088 DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32));
7089 SDValue Upper =
7090 DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
7091 // We have to manually lower both halves so getNode doesn't try to
7092 // reassemble the build_vector.
7093 Lower = LowerBUILD_VECTORvXi1(Lower, DAG);
7094 Upper = LowerBUILD_VECTORvXi1(Upper, DAG);
7095 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
7096 }
7097 SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
7098 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
7099 return DAG.getBitcast(VT, Imm);
7100 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
7101 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
7102 DAG.getIntPtrConstant(0, dl));
7103 }
7104
7105 // Vector has one or more non-const elements
7106 uint64_t Immediate = 0;
7107 SmallVector<unsigned, 16> NonConstIdx;
7108 bool IsSplat = true;
7109 bool HasConstElts = false;
7110 int SplatIdx = -1;
7111 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
7112 SDValue In = Op.getOperand(idx);
7113 if (In.isUndef())
7114 continue;
7115 if (!isa<ConstantSDNode>(In))
7116 NonConstIdx.push_back(idx);
7117 else {
7118 Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
7119 HasConstElts = true;
7120 }
7121 if (SplatIdx < 0)
7122 SplatIdx = idx;
7123 else if (In != Op.getOperand(SplatIdx))
7124 IsSplat = false;
7125 }
7126
7127 // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
7128 if (IsSplat)
7129 return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
7130 DAG.getConstant(1, dl, VT),
7131 DAG.getConstant(0, dl, VT));
7132
7133 // insert elements one by one
7134 SDValue DstVec;
7135 SDValue Imm;
7136 if (Immediate) {
7137 MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
7138 Imm = DAG.getConstant(Immediate, dl, ImmVT);
7139 }
7140 else if (HasConstElts)
7141 Imm = DAG.getConstant(0, dl, VT);
7142 else
7143 Imm = DAG.getUNDEF(VT);
7144 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
7145 DstVec = DAG.getBitcast(VT, Imm);
7146 else {
7147 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
7148 DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
7149 DAG.getIntPtrConstant(0, dl));
7150 }
7151
7152 for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
7153 unsigned InsertIdx = NonConstIdx[i];
7154 DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
7155 Op.getOperand(InsertIdx),
7156 DAG.getIntPtrConstant(InsertIdx, dl));
7157 }
7158 return DstVec;
7159}
7160
7161/// \brief Return true if \p N implements a horizontal binop and return the
7162/// operands for the horizontal binop into V0 and V1.
7163///
7164/// This is a helper function of LowerToHorizontalOp().
7165/// This function checks that the build_vector \p N in input implements a
7166/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
7167/// operation to match.
7168/// For example, if \p Opcode is equal to ISD::ADD, then this function
7169/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
7170/// is equal to ISD::SUB, then this function checks if this is a horizontal
7171/// arithmetic sub.
7172///
7173/// This function only analyzes elements of \p N whose indices are
7174/// in range [BaseIdx, LastIdx).
7175static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
7176 SelectionDAG &DAG,
7177 unsigned BaseIdx, unsigned LastIdx,
7178 SDValue &V0, SDValue &V1) {
7179 EVT VT = N->getValueType(0);
7180
7181 assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!")((BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"
) ? static_cast<void> (0) : __assert_fail ("BaseIdx * 2 <= LastIdx && \"Invalid Indices in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7181, __PRETTY_FUNCTION__))
;
7182 assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&((VT.isVector() && VT.getVectorNumElements() >= LastIdx
&& "Invalid Vector in input!") ? static_cast<void
> (0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7183, __PRETTY_FUNCTION__))
7183 "Invalid Vector in input!")((VT.isVector() && VT.getVectorNumElements() >= LastIdx
&& "Invalid Vector in input!") ? static_cast<void
> (0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7183, __PRETTY_FUNCTION__))
;
7184
7185 bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
7186 bool CanFold = true;
7187 unsigned ExpectedVExtractIdx = BaseIdx;
7188 unsigned NumElts = LastIdx - BaseIdx;
7189 V0 = DAG.getUNDEF(VT);
7190 V1 = DAG.getUNDEF(VT);
7191
7192 // Check if N implements a horizontal binop.
7193 for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
7194 SDValue Op = N->getOperand(i + BaseIdx);
7195
7196 // Skip UNDEFs.
7197 if (Op->isUndef()) {
7198 // Update the expected vector extract index.
7199 if (i * 2 == NumElts)
7200 ExpectedVExtractIdx = BaseIdx;
7201 ExpectedVExtractIdx += 2;
7202 continue;
7203 }
7204
7205 CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
7206
7207 if (!CanFold)
7208 break;
7209
7210 SDValue Op0 = Op.getOperand(0);
7211 SDValue Op1 = Op.getOperand(1);
7212
7213 // Try to match the following pattern:
7214 // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
7215 CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7216 Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7217 Op0.getOperand(0) == Op1.getOperand(0) &&
7218 isa<ConstantSDNode>(Op0.getOperand(1)) &&
7219 isa<ConstantSDNode>(Op1.getOperand(1)));
7220 if (!CanFold)
7221 break;
7222
7223 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7224 unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
7225
7226 if (i * 2 < NumElts) {
7227 if (V0.isUndef()) {
7228 V0 = Op0.getOperand(0);
7229 if (V0.getValueType() != VT)
7230 return false;
7231 }
7232 } else {
7233 if (V1.isUndef()) {
7234 V1 = Op0.getOperand(0);
7235 if (V1.getValueType() != VT)
7236 return false;
7237 }
7238 if (i * 2 == NumElts)
7239 ExpectedVExtractIdx = BaseIdx;
7240 }
7241
7242 SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
7243 if (I0 == ExpectedVExtractIdx)
7244 CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
7245 else if (IsCommutable && I1 == ExpectedVExtractIdx) {
7246 // Try to match the following dag sequence:
7247 // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
7248 CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
7249 } else
7250 CanFold = false;
7251
7252 ExpectedVExtractIdx += 2;
7253 }
7254
7255 return CanFold;
7256}
7257
7258/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
7259/// a concat_vector.
7260///
7261/// This is a helper function of LowerToHorizontalOp().
7262/// This function expects two 256-bit vectors called V0 and V1.
7263/// At first, each vector is split into two separate 128-bit vectors.
7264/// Then, the resulting 128-bit vectors are used to implement two
7265/// horizontal binary operations.
7266///
7267/// The kind of horizontal binary operation is defined by \p X86Opcode.
7268///
7269/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
7270/// the two new horizontal binop.
7271/// When Mode is set, the first horizontal binop dag node would take as input
7272/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
7273/// horizontal binop dag node would take as input the lower 128-bit of V1
7274/// and the upper 128-bit of V1.
7275/// Example:
7276/// HADD V0_LO, V0_HI
7277/// HADD V1_LO, V1_HI
7278///
7279/// Otherwise, the first horizontal binop dag node takes as input the lower
7280/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
7281/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
7282/// Example:
7283/// HADD V0_LO, V1_LO
7284/// HADD V0_HI, V1_HI
7285///
7286/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
7287/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
7288/// the upper 128-bits of the result.
7289static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
7290 const SDLoc &DL, SelectionDAG &DAG,
7291 unsigned X86Opcode, bool Mode,
7292 bool isUndefLO, bool isUndefHI) {
7293 MVT VT = V0.getSimpleValueType();
7294 assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&((VT.is256BitVector() && VT == V1.getSimpleValueType(
) && "Invalid nodes in input!") ? static_cast<void
> (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7295, __PRETTY_FUNCTION__))
7295 "Invalid nodes in input!")((VT.is256BitVector() && VT == V1.getSimpleValueType(
) && "Invalid nodes in input!") ? static_cast<void
> (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318211/lib/Target/X86/X86ISelLowering.cpp"
, 7295, __PRETTY_FUNCTION__))
;
7296
7297 unsigned NumElts = VT.getVectorNumElements();
7298 SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
7299 SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
7300 SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
7301 SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
7302 MVT NewVT = V0_LO.getSimpleValueType();
7303
7304 SDValue LO = DAG.getUNDEF(NewVT);
7305 SDValue HI = DAG.getUNDEF(NewVT);
7306
7307 if (Mode) {
7308 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7309 if (!isUndefLO && !V0->isUndef())
7310 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
7311 if (!isUndefHI && !V1->isUndef())
7312 HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
7313 } else {
7314 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7315 if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef()))
7316 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
7317
7318 if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef()))
7319 HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
7320 }
7321
7322 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
7323}
7324
7325/// Returns true iff \p BV builds a vector with the result equivalent to
7326/// the result of ADDSUB operation.
7327/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
7328/// are written to the parameters \p Opnd0 and \p Opnd1.
7329static bool isAddSub(const BuildVectorSDNode *BV,
7330 const X86Subtarget &Subtarget, SelectionDAG &DAG,
7331 SDValue &Opnd0, SDValue &Opnd1) {
7332
7333 MVT VT = BV->getSimpleValueType(0);
7334 if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
7335 (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
7336 (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
7337 return false;
7338
7339 unsigned NumElts = VT.getVectorNumElements();
7340 SDValue InVec0 = DAG.getUNDEF(VT);
7341 SDValue InVec1 = DAG.getUNDEF(VT);
7342
7343 // Odd-numbered elements in the input build vector are obtained from
7344 // adding two integer/float elements.
7345 // Even-numbered elements in the input build vector are obtained from
7346 // subtracting two integer/float elements.
7347 unsigned ExpectedOpcode = ISD::FSUB;
7348 unsigned NextExpectedOpcode = ISD::FADD;
7349 bool AddFound = false;
7350 bool SubFound = false;
7351
7352 for (unsigned i = 0, e = NumElts; i != e; ++i) {
7353 SDValue Op = BV->getOperand(i);
7354
7355 // Skip 'undef' values.
7356 unsigned Opcode = Op.getOpcode();
7357 if (Opcode == ISD::UNDEF) {
7358 std::swap(ExpectedOpcode, NextExpectedOpcode);
7359 continue;
7360 }
7361
7362 // Early exit if we found an unexpected opcode.
7363 if (Opcode != ExpectedOpcode)
7364 return false;
7365
7366 SDValue Op0 = Op.getOperand(0);
7367 SDValue Op1 = Op.getOperand(1);
7368
7369 // Try to match the following pattern:
7370 // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
7371 // Early exit if we cannot match that sequence.
7372 if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7373 Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7374 !isa<ConstantSDNode>(Op0.getOperand(1)) ||
7375 !isa<ConstantSDNode>(Op1.getOperand(1)) ||
7376 Op0.getOperand(1) != Op1.getOperand(1))
7377 return false;
7378
7379 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7380 if (I0 != i)
7381 return false;
7382
7383 // We found a valid add/sub node. Update the information accordingly.
7384 if (i & 1)
7385 AddFound = true;
7386 else
7387 SubFound = true;
7388
7389 // Update InVec0 and InVec1.
7390 if (InVec0.isUndef()) {
7391 InVec0 = Op0.getOperand(0);
7392 if (InVec0.getSimpleValueType() != VT)
7393 return false;
7394 }
7395 if (InVec1.isUndef()) {
7396 InVec1 = Op1.getOperand(0);
7397 if (InVec1.getSimpleValueType() != VT)
7398 return false;
7399 }
7400
7401 // Make sure that operands in input to each add/sub node always
7402 // come from a same pair of vectors.
7403 if (InVec0 != Op0.getOperand(0)) {
7404 if (ExpectedOpcode == ISD::FSUB)
7405 return false;
7406
7407 // FADD is commutable. Try to commute the operands
7408 // and then test again.
7409 std::swap(Op0, Op1);
7410 if (InVec0 != Op0.getOperand(0))
7411 return false;
7412 }
7413
7414 if (InVec1 != Op1.getOperand(0))
7415 return false;
7416
7417 // Update the pair of expected opcodes.
7418 std::swap(ExpectedOpcode, NextExpectedOpcode);
7419 }
7420
7421 // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
7422 if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
7423 return false;
7424
7425 Opnd0 = InVec0;
7426 Opnd1 = InVec1;
7427 return true;
7428}
7429
7430/// Returns true if is possible to fold MUL and an idiom that has already been
7431/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
7432/// If (and only if) true is returned, the operands of FMADDSUB are written to
7433/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
7434///
7435/// Prior to calling this function it should be known that there is some
7436/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
7437/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
7438/// before replacement of such SDNode with ADDSUB operation. Thus the number
7439/// of \p Opnd0 uses is expected to be equal to 2.
7440/// For example, this function may be called for the following IR:
7441/// %AB = fmul fast <2 x double> %A, %B
7442/// %Sub = fsub fast <2 x double> %AB, %C
7443/// %Add = fadd fast <2 x double> %AB, %C
7444/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
7445/// <2 x i32> <i32 0, i32 3>
7446/// There is a def for %Addsub here, which potentially can be replaced by
7447/// X86ISD::ADDSUB operation:
7448/// %Addsub = X86ISD::ADDSUB %AB, %C
7449/// and such ADDSUB can further be replaced with FMADDSUB:
7450/// %Addsub = FMADDSUB %A, %B, %C.
7451///
7452/// The main reason why this method is called before the replacement of the
7453/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
7454/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
7455/// FMADDSUB is.
7456static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
7457 SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {
7458 if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 ||
7459 !Subtarget.hasAnyFMA())
7460 return false;
7461
7462 // FIXME: These checks must match the similar ones in
7463 // DAGCombiner::visitFADDForFMACombine. It would be good to have one
7464 // function that would answer if it is Ok to fuse MUL + ADD to FMADD
7465 // or MUL + ADDSUB to FMADDSUB.
7466 const TargetOptions &Options = DAG.getTarget().Options;
7467 bool AllowFusion =
7468 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
7469 if (!AllowFusion)
7470 return false;
7471
7472 Opnd2 = Opnd1;
7473 Opnd1 = Opnd0.getOperand(1);
7474 Opnd0 = Opnd0.getOperand(0);
7475
7476 return true;
7477}
7478
7479/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
7480/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
7481static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
7482 const X86Subtarget &Subtarget,
7483 SelectionDAG &DAG) {
7484 SDValue Opnd0, Opnd1;
7485 if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))
7486 return SDValue();
7487
7488 MVT VT = BV->getSimpleValueType(0);
7489 SDLoc DL(BV);
7490
7491 // Try to generate X86ISD::FMADDSUB node here.
7492 SDValue Opnd2;
7493 if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
7494 return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
7495
7496 // Do not generate X86ISD::ADDSUB node for 512-bit types even though
7497 // the ADDSUB idiom has been successfully recognized. There are no known
7498 // X86 targets with 512-bit ADDSUB instructions!
7499 // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
7500 // recognition.
7501 if (VT.is512BitVector())
7502 return SDValue();
7503
7504 return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
7505}
7506
7507/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
7508static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
7509 const X86Subtarget &Subtarget,
7510 SelectionDAG &DAG) {
7511 MVT VT = BV->getSimpleValueType(0);
7512 unsigned NumElts = VT.getVectorNumElements();
7513 unsigned NumUndefsLO = 0;
7514 unsigned NumUndefsHI = 0;
7515 unsigned Half = NumElts/2;
7516
7517 // Count the number of UNDEF operands in the build_vector in input.
7518 for (unsigned i = 0, e = Half; i != e; ++i)
7519 if (BV->getOperand(i)->isUndef())
7520 NumUndefsLO++;
7521
7522 for (unsigned i = Half, e = NumElts; i != e; ++i)
7523 if (BV->getOperand(i)->isUndef())
7524 NumUndefsHI++;
7525
7526 // Early exit if this is either a build_vector of all UNDEFs or all the
7527 // operands but one are UNDEF.
7528 if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
7529 return SDValue();
7530
7531 SDLoc DL(BV);
7532 SDValue InVec0, InVec1;
7533 if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
7534 // Try to match an SSE3 float HADD/HSUB.
7535 if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
7536 return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
7537
7538 if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
7539 return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
7540 } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
7541 // Try to match an SSSE3 integer HADD/HSUB.
7542 if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
7543 return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
7544
7545 if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
7546 return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
7547 }
7548
7549 if (!Subtarget.hasAVX())
7550 return SDValue();
7551
7552 if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
7553 // Try to match an AVX horizontal add/sub of packed single/double
7554 // precision floating point values from 256-bit vectors.
7555 SDValue InVec2, InVec3;
7556 if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
7557 isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
7558 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7559 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7560 return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
7561
7562 if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
7563 isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
7564 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7565 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7566 return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
7567 } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
7568 // Try to match an AVX2 horizontal add/sub of signed integers.
7569 SDValue InVec2, InVec3;
7570 unsigned X86Opcode;
7571 bool CanFold = true;
7572
7573