Bug Summary

File:lib/Target/X86/X86ISelLowering.cpp
Warning:line 125, column 3
Potential memory leak

Annotated Source Code

/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86ShuffleDecodeConstantPool.h"
23#include "X86TargetMachine.h"
24#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/WinEHFuncInfo.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/DiagnosticInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalAlias.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/MC/MCAsmInfo.h"
51#include "llvm/MC/MCContext.h"
52#include "llvm/MC/MCExpr.h"
53#include "llvm/MC/MCSymbol.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/MathExtras.h"
59#include "llvm/Target/TargetOptions.h"
60#include <algorithm>
61#include <bitset>
62#include <cctype>
63#include <numeric>
64using namespace llvm;
65
66#define DEBUG_TYPE"x86-isel" "x86-isel"
67
68STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
69
70static cl::opt<bool> ExperimentalVectorWideningLegalization(
71 "x86-experimental-vector-widening-legalization", cl::init(false),
72 cl::desc("Enable an experimental vector type legalization through widening "
73 "rather than promotion."),
74 cl::Hidden);
75
76static cl::opt<int> ExperimentalPrefLoopAlignment(
77 "x86-experimental-pref-loop-alignment", cl::init(4),
78 cl::desc("Sets the preferable loop alignment for experiments "
79 "(the last x86-experimental-pref-loop-alignment bits"
80 " of the loop header PC will be 0)."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89/// Call this when the user attempts to do something unsupported, like
90/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
91/// report_fatal_error, so calling code should attempt to recover without
92/// crashing.
93static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
94 const char *Msg) {
95 MachineFunction &MF = DAG.getMachineFunction();
96 DAG.getContext()->diagnose(
97 DiagnosticInfoUnsupported(*MF.getFunction(), Msg, dl.getDebugLoc()));
98}
99
100X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
101 const X86Subtarget &STI)
102 : TargetLowering(TM), Subtarget(STI) {
103 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
104 X86ScalarSSEf64 = Subtarget.hasSSE2();
105 X86ScalarSSEf32 = Subtarget.hasSSE1();
106 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
107
108 // Set up the TargetLowering object.
109
110 // X86 is weird. It always uses i8 for shift amounts and setcc results.
111 setBooleanContents(ZeroOrOneBooleanContent);
112 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
113 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
114
115 // For 64-bit, since we have so many registers, use the ILP scheduler.
116 // For 32-bit, use the register pressure specific scheduling.
117 // For Atom, always use ILP scheduling.
118 if (Subtarget.isAtom())
119 setSchedulingPreference(Sched::ILP);
120 else if (Subtarget.is64Bit())
121 setSchedulingPreference(Sched::ILP);
122 else
123 setSchedulingPreference(Sched::RegPressure);
124 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
125 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
126
127 // Bypass expensive divides and use cheaper ones.
128 if (TM.getOptLevel() >= CodeGenOpt::Default) {
129 if (Subtarget.hasSlowDivide32())
130 addBypassSlowDiv(32, 8);
131 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
132 addBypassSlowDiv(64, 32);
133 }
134
135 if (Subtarget.isTargetKnownWindowsMSVC() ||
136 Subtarget.isTargetWindowsItanium()) {
137 // Setup Windows compiler runtime calls.
138 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
139 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
140 setLibcallName(RTLIB::SREM_I64, "_allrem");
141 setLibcallName(RTLIB::UREM_I64, "_aullrem");
142 setLibcallName(RTLIB::MUL_I64, "_allmul");
143 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
146 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
147 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
148 }
149
150 if (Subtarget.isTargetDarwin()) {
151 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
152 setUseUnderscoreSetJmp(false);
153 setUseUnderscoreLongJmp(false);
154 } else if (Subtarget.isTargetWindowsGNU()) {
155 // MS runtime is weird: it exports _setjmp, but longjmp!
156 setUseUnderscoreSetJmp(true);
157 setUseUnderscoreLongJmp(false);
158 } else {
159 setUseUnderscoreSetJmp(true);
160 setUseUnderscoreLongJmp(true);
161 }
162
163 // Set up the register classes.
164 addRegisterClass(MVT::i8, &X86::GR8RegClass);
165 addRegisterClass(MVT::i16, &X86::GR16RegClass);
166 addRegisterClass(MVT::i32, &X86::GR32RegClass);
167 if (Subtarget.is64Bit())
168 addRegisterClass(MVT::i64, &X86::GR64RegClass);
169
170 for (MVT VT : MVT::integer_valuetypes())
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
172
173 // We don't accept any truncstore of integer registers.
174 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
175 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
176 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
177 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
178 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
179 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
180
181 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
182
183 // SETOEQ and SETUNE require checking two conditions.
184 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
185 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
186 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
187 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
188 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
189 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
190
191 // Integer absolute.
192 if (Subtarget.hasCMov()) {
193 setOperationAction(ISD::ABS , MVT::i16 , Custom);
194 setOperationAction(ISD::ABS , MVT::i32 , Custom);
195 if (Subtarget.is64Bit())
196 setOperationAction(ISD::ABS , MVT::i64 , Custom);
197 }
198
199 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
200 // operation.
201 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
202 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
203 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
204
205 if (Subtarget.is64Bit()) {
206 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
207 // f32/f64 are legal, f80 is custom.
208 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
209 else
210 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
211 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
212 } else if (!Subtarget.useSoftFloat()) {
213 // We have an algorithm for SSE2->double, and we turn this into a
214 // 64-bit FILD followed by conditional FADD for other targets.
215 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
216 // We have an algorithm for SSE2, and we turn this into a 64-bit
217 // FILD or VCVTUSI2SS/SD for other targets.
218 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
219 }
220
221 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
222 // this operation.
223 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
224 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
225
226 if (!Subtarget.useSoftFloat()) {
227 // SSE has no i16 to fp conversion, only i32.
228 if (X86ScalarSSEf32) {
229 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
230 // f32 and f64 cases are Legal, f80 case is not
231 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
232 } else {
233 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
234 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
235 }
236 } else {
237 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
238 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
239 }
240
241 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
242 // this operation.
243 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
244 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
245
246 if (!Subtarget.useSoftFloat()) {
247 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
248 // are Legal, f80 is custom lowered.
249 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
250 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
251
252 if (X86ScalarSSEf32) {
253 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
254 // f32 and f64 cases are Legal, f80 case is not
255 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
256 } else {
257 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
258 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
259 }
260 } else {
261 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
262 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
263 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
264 }
265
266 // Handle FP_TO_UINT by promoting the destination to a larger signed
267 // conversion.
268 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
269 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
270 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
271
272 if (Subtarget.is64Bit()) {
273 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
274 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
275 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
276 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
277 } else {
278 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
279 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
280 }
281 } else if (!Subtarget.useSoftFloat()) {
282 // Since AVX is a superset of SSE3, only check for SSE here.
283 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
284 // Expand FP_TO_UINT into a select.
285 // FIXME: We would like to use a Custom expander here eventually to do
286 // the optimal thing for SSE vs. the default expansion in the legalizer.
287 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
288 else
289 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
290 // With SSE3 we can use fisttpll to convert to a signed i64; without
291 // SSE, we're stuck with a fistpll.
292 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
293
294 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
295 }
296
297 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
298 if (!X86ScalarSSEf64) {
299 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
300 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
301 if (Subtarget.is64Bit()) {
302 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
303 // Without SSE, i64->f64 goes through memory.
304 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
305 }
306 } else if (!Subtarget.is64Bit())
307 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
308
309 // Scalar integer divide and remainder are lowered to use operations that
310 // produce two results, to match the available instructions. This exposes
311 // the two-result form to trivial CSE, which is able to combine x/y and x%y
312 // into a single instruction.
313 //
314 // Scalar integer multiply-high is also lowered to use two-result
315 // operations, to match the available instructions. However, plain multiply
316 // (low) operations are left as Legal, as there are single-result
317 // instructions for this in x86. Using the two-result multiply instructions
318 // when both high and low results are needed must be arranged by dagcombine.
319 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
320 setOperationAction(ISD::MULHS, VT, Expand);
321 setOperationAction(ISD::MULHU, VT, Expand);
322 setOperationAction(ISD::SDIV, VT, Expand);
323 setOperationAction(ISD::UDIV, VT, Expand);
324 setOperationAction(ISD::SREM, VT, Expand);
325 setOperationAction(ISD::UREM, VT, Expand);
326 }
327
328 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
329 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
330 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
331 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
332 setOperationAction(ISD::BR_CC, VT, Expand);
333 setOperationAction(ISD::SELECT_CC, VT, Expand);
334 }
335 if (Subtarget.is64Bit())
336 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
337 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
338 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
340 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
341
342 setOperationAction(ISD::FREM , MVT::f32 , Expand);
343 setOperationAction(ISD::FREM , MVT::f64 , Expand);
344 setOperationAction(ISD::FREM , MVT::f80 , Expand);
345 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
346
347 // Promote the i8 variants and force them on up to i32 which has a shorter
348 // encoding.
349 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
350 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
351 if (!Subtarget.hasBMI()) {
352 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
353 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
354 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
355 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
356 if (Subtarget.is64Bit()) {
357 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
358 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
359 }
360 }
361
362 if (Subtarget.hasLZCNT()) {
363 // When promoting the i8 variants, force them to i32 for a shorter
364 // encoding.
365 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
366 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
367 } else {
368 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
369 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
370 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
371 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
372 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
373 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
374 if (Subtarget.is64Bit()) {
375 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
376 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
377 }
378 }
379
380 // Special handling for half-precision floating point conversions.
381 // If we don't have F16C support, then lower half float conversions
382 // into library calls.
383 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
384 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
385 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
386 }
387
388 // There's never any support for operations beyond MVT::f32.
389 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
390 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
391 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
392 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
393
394 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
395 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
396 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
397 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
398 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
399 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
400
401 if (Subtarget.hasPOPCNT()) {
402 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
403 } else {
404 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
405 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
406 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
407 if (Subtarget.is64Bit())
408 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
409 }
410
411 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
412
413 if (!Subtarget.hasMOVBE())
414 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
415
416 // These should be promoted to a larger select which is supported.
417 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
418 // X86 wants to expand cmov itself.
419 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
420 setOperationAction(ISD::SELECT, VT, Custom);
421 setOperationAction(ISD::SETCC, VT, Custom);
422 }
423 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
424 if (VT == MVT::i64 && !Subtarget.is64Bit())
425 continue;
426 setOperationAction(ISD::SELECT, VT, Custom);
427 setOperationAction(ISD::SETCC, VT, Custom);
428 }
429
430 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
431 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
432 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
433
434 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
435 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
436 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
437 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
438 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
439 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
440 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
441 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
442
443 // Darwin ABI issue.
444 for (auto VT : { MVT::i32, MVT::i64 }) {
445 if (VT == MVT::i64 && !Subtarget.is64Bit())
446 continue;
447 setOperationAction(ISD::ConstantPool , VT, Custom);
448 setOperationAction(ISD::JumpTable , VT, Custom);
449 setOperationAction(ISD::GlobalAddress , VT, Custom);
450 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
451 setOperationAction(ISD::ExternalSymbol , VT, Custom);
452 setOperationAction(ISD::BlockAddress , VT, Custom);
453 }
454
455 // 64-bit shl, sra, srl (iff 32-bit x86)
456 for (auto VT : { MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SHL_PARTS, VT, Custom);
460 setOperationAction(ISD::SRA_PARTS, VT, Custom);
461 setOperationAction(ISD::SRL_PARTS, VT, Custom);
462 }
463
464 if (Subtarget.hasSSE1())
465 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
466
467 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
468
469 // Expand certain atomics
470 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
471 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
472 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
473 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
477 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
478 }
479
480 if (Subtarget.hasCmpxchg16b()) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
482 }
483
484 // FIXME - use subtarget debug flags
485 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
486 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
487 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
488 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
489 }
490
491 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
492 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
493
494 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
495 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
496
497 setOperationAction(ISD::TRAP, MVT::Other, Legal);
498 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
499
500 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
501 setOperationAction(ISD::VASTART , MVT::Other, Custom);
502 setOperationAction(ISD::VAEND , MVT::Other, Expand);
503 bool Is64Bit = Subtarget.is64Bit();
504 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
505 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
506
507 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
508 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
509
510 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
511
512 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
513 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
514 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
515
516 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
517 // f32 and f64 use SSE.
518 // Set up the FP register classes.
519 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
520 : &X86::FR32RegClass);
521 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
522 : &X86::FR64RegClass);
523
524 for (auto VT : { MVT::f32, MVT::f64 }) {
525 // Use ANDPD to simulate FABS.
526 setOperationAction(ISD::FABS, VT, Custom);
527
528 // Use XORP to simulate FNEG.
529 setOperationAction(ISD::FNEG, VT, Custom);
530
531 // Use ANDPD and ORPD to simulate FCOPYSIGN.
532 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
533
534 // We don't support sin/cos/fmod
535 setOperationAction(ISD::FSIN , VT, Expand);
536 setOperationAction(ISD::FCOS , VT, Expand);
537 setOperationAction(ISD::FSINCOS, VT, Expand);
538 }
539
540 // Lower this to MOVMSK plus an AND.
541 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
542 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
543
544 // Expand FP immediates into loads from the stack, except for the special
545 // cases we handle.
546 addLegalFPImmediate(APFloat(+0.0)); // xorpd
547 addLegalFPImmediate(APFloat(+0.0f)); // xorps
548 } else if (UseX87 && X86ScalarSSEf32) {
549 // Use SSE for f32, x87 for f64.
550 // Set up the FP register classes.
551 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
552 : &X86::FR32RegClass);
553 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
554
555 // Use ANDPS to simulate FABS.
556 setOperationAction(ISD::FABS , MVT::f32, Custom);
557
558 // Use XORP to simulate FNEG.
559 setOperationAction(ISD::FNEG , MVT::f32, Custom);
560
561 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
562
563 // Use ANDPS and ORPS to simulate FCOPYSIGN.
564 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
565 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
566
567 // We don't support sin/cos/fmod
568 setOperationAction(ISD::FSIN , MVT::f32, Expand);
569 setOperationAction(ISD::FCOS , MVT::f32, Expand);
570 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
571
572 // Special cases we handle for FP constants.
573 addLegalFPImmediate(APFloat(+0.0f)); // xorps
574 addLegalFPImmediate(APFloat(+0.0)); // FLD0
575 addLegalFPImmediate(APFloat(+1.0)); // FLD1
576 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
577 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
578
579 // Always expand sin/cos functions even though x87 has an instruction.
580 setOperationAction(ISD::FSIN , MVT::f64, Expand);
581 setOperationAction(ISD::FCOS , MVT::f64, Expand);
582 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
583 } else if (UseX87) {
584 // f32 and f64 in x87.
585 // Set up the FP register classes.
586 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
587 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
588
589 for (auto VT : { MVT::f32, MVT::f64 }) {
590 setOperationAction(ISD::UNDEF, VT, Expand);
591 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
592
593 // Always expand sin/cos functions even though x87 has an instruction.
594 setOperationAction(ISD::FSIN , VT, Expand);
595 setOperationAction(ISD::FCOS , VT, Expand);
596 setOperationAction(ISD::FSINCOS, VT, Expand);
597 }
598 addLegalFPImmediate(APFloat(+0.0)); // FLD0
599 addLegalFPImmediate(APFloat(+1.0)); // FLD1
600 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
601 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
602 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
603 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
604 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
605 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
606 }
607
608 // We don't support FMA.
609 setOperationAction(ISD::FMA, MVT::f64, Expand);
610 setOperationAction(ISD::FMA, MVT::f32, Expand);
611
612 // Long double always uses X87, except f128 in MMX.
613 if (UseX87) {
614 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
615 addRegisterClass(MVT::f128, &X86::FR128RegClass);
616 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
617 setOperationAction(ISD::FABS , MVT::f128, Custom);
618 setOperationAction(ISD::FNEG , MVT::f128, Custom);
619 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
620 }
621
622 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
623 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
624 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
625 {
626 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
627 addLegalFPImmediate(TmpFlt); // FLD0
628 TmpFlt.changeSign();
629 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
630
631 bool ignored;
632 APFloat TmpFlt2(+1.0);
633 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
634 &ignored);
635 addLegalFPImmediate(TmpFlt2); // FLD1
636 TmpFlt2.changeSign();
637 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
638 }
639
640 // Always expand sin/cos functions even though x87 has an instruction.
641 setOperationAction(ISD::FSIN , MVT::f80, Expand);
642 setOperationAction(ISD::FCOS , MVT::f80, Expand);
643 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
644
645 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
646 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
647 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
648 setOperationAction(ISD::FRINT, MVT::f80, Expand);
649 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
650 setOperationAction(ISD::FMA, MVT::f80, Expand);
651 }
652
653 // Always use a library call for pow.
654 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
655 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
656 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
657
658 setOperationAction(ISD::FLOG, MVT::f80, Expand);
659 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
660 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
661 setOperationAction(ISD::FEXP, MVT::f80, Expand);
662 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
663 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
664 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
665
666 // Some FP actions are always expanded for vector types.
667 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
668 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
669 setOperationAction(ISD::FSIN, VT, Expand);
670 setOperationAction(ISD::FSINCOS, VT, Expand);
671 setOperationAction(ISD::FCOS, VT, Expand);
672 setOperationAction(ISD::FREM, VT, Expand);
673 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
674 setOperationAction(ISD::FPOW, VT, Expand);
675 setOperationAction(ISD::FLOG, VT, Expand);
676 setOperationAction(ISD::FLOG2, VT, Expand);
677 setOperationAction(ISD::FLOG10, VT, Expand);
678 setOperationAction(ISD::FEXP, VT, Expand);
679 setOperationAction(ISD::FEXP2, VT, Expand);
680 }
681
682 // First set operation action for all vector types to either promote
683 // (for widening) or expand (for scalarization). Then we will selectively
684 // turn on ones that can be effectively codegen'd.
685 for (MVT VT : MVT::vector_valuetypes()) {
686 setOperationAction(ISD::SDIV, VT, Expand);
687 setOperationAction(ISD::UDIV, VT, Expand);
688 setOperationAction(ISD::SREM, VT, Expand);
689 setOperationAction(ISD::UREM, VT, Expand);
690 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
691 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
692 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
693 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
694 setOperationAction(ISD::FMA, VT, Expand);
695 setOperationAction(ISD::FFLOOR, VT, Expand);
696 setOperationAction(ISD::FCEIL, VT, Expand);
697 setOperationAction(ISD::FTRUNC, VT, Expand);
698 setOperationAction(ISD::FRINT, VT, Expand);
699 setOperationAction(ISD::FNEARBYINT, VT, Expand);
700 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
701 setOperationAction(ISD::MULHS, VT, Expand);
702 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
703 setOperationAction(ISD::MULHU, VT, Expand);
704 setOperationAction(ISD::SDIVREM, VT, Expand);
705 setOperationAction(ISD::UDIVREM, VT, Expand);
706 setOperationAction(ISD::CTPOP, VT, Expand);
707 setOperationAction(ISD::CTTZ, VT, Expand);
708 setOperationAction(ISD::CTLZ, VT, Expand);
709 setOperationAction(ISD::ROTL, VT, Expand);
710 setOperationAction(ISD::ROTR, VT, Expand);
711 setOperationAction(ISD::BSWAP, VT, Expand);
712 setOperationAction(ISD::SETCC, VT, Expand);
713 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
714 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
715 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
716 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
717 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
718 setOperationAction(ISD::TRUNCATE, VT, Expand);
719 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
720 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
721 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
722 setOperationAction(ISD::SELECT_CC, VT, Expand);
723 for (MVT InnerVT : MVT::vector_valuetypes()) {
724 setTruncStoreAction(InnerVT, VT, Expand);
725
726 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
727 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
728
729 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
730 // types, we have to deal with them whether we ask for Expansion or not.
731 // Setting Expand causes its own optimisation problems though, so leave
732 // them legal.
733 if (VT.getVectorElementType() == MVT::i1)
734 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
735
736 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
737 // split/scalarized right now.
738 if (VT.getVectorElementType() == MVT::f16)
739 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
740 }
741 }
742
743 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
744 // with -msoft-float, disable use of MMX as well.
745 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
746 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
747 // No operations on x86mmx supported, everything uses intrinsics.
748 }
749
750 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
751 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
752 : &X86::VR128RegClass);
753
754 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
755 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
756 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
757 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
758 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
759 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
760 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
761 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
762 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
763 }
764
765 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
766 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
767 : &X86::VR128RegClass);
768
769 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
770 // registers cannot be used even for integer operations.
771 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
772 : &X86::VR128RegClass);
773 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
774 : &X86::VR128RegClass);
775 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
776 : &X86::VR128RegClass);
777 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
778 : &X86::VR128RegClass);
779
780 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
781 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
782 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
783 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
784 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
785 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
786 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
787 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
788 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
789 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
790 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
791 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
792 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
793
794 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
795 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
796 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
797 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
798
799 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
800 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
801 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
802
803 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
804 setOperationAction(ISD::SETCC, VT, Custom);
805 setOperationAction(ISD::CTPOP, VT, Custom);
806 setOperationAction(ISD::CTTZ, VT, Custom);
807 }
808
809 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
810 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
811 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
812 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
813 setOperationAction(ISD::VSELECT, VT, Custom);
814 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
815 }
816
817 // We support custom legalizing of sext and anyext loads for specific
818 // memory vector types which we can load as a scalar (or sequence of
819 // scalars) and extend in-register to a legal 128-bit vector type. For sext
820 // loads these must work with a single scalar load.
821 for (MVT VT : MVT::integer_vector_valuetypes()) {
822 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
823 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
824 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
825 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
826 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
827 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
828 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
829 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
830 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
831 }
832
833 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
834 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
835 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
836 setOperationAction(ISD::VSELECT, VT, Custom);
837
838 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
839 continue;
840
841 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
842 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
843 }
844
845 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
846 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
847 setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
848 setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
849 setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
850 setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
851 setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
852 }
853
854 // Custom lower v2i64 and v2f64 selects.
855 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
856 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
857
858 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
859 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
860
861 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
862 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
863
864 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
865 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
866 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
867
868 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
869 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
870
871 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
872 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
873
874 for (MVT VT : MVT::fp_vector_valuetypes())
875 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
876
877 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
878 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
879 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
880
881 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
882 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
883 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
884
885 // In the customized shift lowering, the legal v4i32/v2i64 cases
886 // in AVX2 will be recognized.
887 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
888 setOperationAction(ISD::SRL, VT, Custom);
889 setOperationAction(ISD::SHL, VT, Custom);
890 setOperationAction(ISD::SRA, VT, Custom);
891 }
892 }
893
894 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
895 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
896 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
897 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
898 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
899 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
900 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
901 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
902 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
903 }
904
905 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
906 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
907 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
908 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
909 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
910 setOperationAction(ISD::FRINT, RoundedTy, Legal);
911 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
912 }
913
914 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
915 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
916 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
917 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
918 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
919 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
920 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
921 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
922
923 // FIXME: Do we need to handle scalar-to-vector here?
924 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
925
926 // We directly match byte blends in the backend as they match the VSELECT
927 // condition form.
928 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
929
930 // SSE41 brings specific instructions for doing vector sign extend even in
931 // cases where we don't have SRA.
932 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
933 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
934 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
935 }
936
937 for (MVT VT : MVT::integer_vector_valuetypes()) {
938 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
939 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
940 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
941 }
942
943 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
944 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
945 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
946 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
947 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
948 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
949 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
950 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
951 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
952 }
953
954 // i8 vectors are custom because the source register and source
955 // source memory operand types are not the same width.
956 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
957 }
958
959 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
960 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
961 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
962 setOperationAction(ISD::ROTL, VT, Custom);
963
964 // XOP can efficiently perform BITREVERSE with VPPERM.
965 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
966 setOperationAction(ISD::BITREVERSE, VT, Custom);
967
968 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
969 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
970 setOperationAction(ISD::BITREVERSE, VT, Custom);
971 }
972
973 // Special handling for masked gather of 2 elements
974 if (Subtarget.hasAVX2() && !Subtarget.hasAVX512())
975 setOperationAction(ISD::MGATHER, MVT::v2i64, Custom);
976
977 if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
978 bool HasInt256 = Subtarget.hasInt256();
979
980 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
981 : &X86::VR256RegClass);
982 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
983 : &X86::VR256RegClass);
984 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
985 : &X86::VR256RegClass);
986 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
987 : &X86::VR256RegClass);
988 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
989 : &X86::VR256RegClass);
990 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
991 : &X86::VR256RegClass);
992
993 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
994 setOperationAction(ISD::FFLOOR, VT, Legal);
995 setOperationAction(ISD::FCEIL, VT, Legal);
996 setOperationAction(ISD::FTRUNC, VT, Legal);
997 setOperationAction(ISD::FRINT, VT, Legal);
998 setOperationAction(ISD::FNEARBYINT, VT, Legal);
999 setOperationAction(ISD::FNEG, VT, Custom);
1000 setOperationAction(ISD::FABS, VT, Custom);
1001 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1002 }
1003
1004 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1005 // even though v8i16 is a legal type.
1006 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1007 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1008 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1009
1010 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1011 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1012 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1013
1014 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1015 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1016
1017 for (MVT VT : MVT::fp_vector_valuetypes())
1018 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1019
1020 // In the customized shift lowering, the legal v8i32/v4i64 cases
1021 // in AVX2 will be recognized.
1022 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1023 setOperationAction(ISD::SRL, VT, Custom);
1024 setOperationAction(ISD::SHL, VT, Custom);
1025 setOperationAction(ISD::SRA, VT, Custom);
1026 }
1027
1028 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1029 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1030 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1031
1032 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1033 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1034 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1035 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1036 }
1037
1038 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1039 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1040 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1041 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1042
1043 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1044 setOperationAction(ISD::SETCC, VT, Custom);
1045 setOperationAction(ISD::CTPOP, VT, Custom);
1046 setOperationAction(ISD::CTTZ, VT, Custom);
1047 setOperationAction(ISD::CTLZ, VT, Custom);
1048 }
1049
1050 if (Subtarget.hasAnyFMA()) {
1051 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1052 MVT::v2f64, MVT::v4f64 })
1053 setOperationAction(ISD::FMA, VT, Legal);
1054 }
1055
1056 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1057 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1058 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1059 }
1060
1061 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1062 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1063 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1064 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1065
1066 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1067 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1068
1069 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1070 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1071 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1072 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1073
1074 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1075 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1076 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1077 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1078 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1079 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1080 }
1081
1082 if (HasInt256) {
1083 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
1084 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
1085 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
1086
1087 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1088 // when we have a 256bit-wide blend with immediate.
1089 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1090
1091 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1092 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1093 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1094 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1095 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1096 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1097 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1098 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1099 }
1100 }
1101
1102 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1103 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1104 setOperationAction(ISD::MLOAD, VT, Legal);
1105 setOperationAction(ISD::MSTORE, VT, Legal);
1106 }
1107
1108 // Extract subvector is special because the value type
1109 // (result) is 128-bit but the source is 256-bit wide.
1110 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1111 MVT::v4f32, MVT::v2f64 }) {
1112 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1113 }
1114
1115 // Custom lower several nodes for 256-bit types.
1116 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1117 MVT::v8f32, MVT::v4f64 }) {
1118 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1119 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1120 setOperationAction(ISD::VSELECT, VT, Custom);
1121 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1122 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1123 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1124 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1125 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1126 }
1127
1128 if (HasInt256)
1129 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1130
1131 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1132 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1133 setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
1134 setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
1135 setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
1136 setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
1137 setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
1138 }
1139 }
1140
1141 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1142 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1143 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1144 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1145 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1146
1147 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1148 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1149 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1150
1151 for (MVT VT : MVT::fp_vector_valuetypes())
1152 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1153
1154 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1155 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1156 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1157 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1158 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1159 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1160 }
1161
1162 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
1163 MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
1164 MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
1165 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
1166 setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
1167 setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
1168 setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom);
1169 setTruncStoreAction(VT, MaskVT, Custom);
1170 }
1171
1172 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1173 setOperationAction(ISD::FNEG, VT, Custom);
1174 setOperationAction(ISD::FABS, VT, Custom);
1175 setOperationAction(ISD::FMA, VT, Legal);
1176 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1177 }
1178
1179 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1180 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1181 setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Legal);
1182 setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Legal);
1183 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1184 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1185 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1186 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1187 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1188 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1189 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1190 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1191 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1192 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1193 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1194 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1195 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1196 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1197 setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
1198 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1199 setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
1200 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1201 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1202 setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
1203 setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
1204 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1205 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1206
1207 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1208 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1209 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1210 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1211 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1212 if (Subtarget.hasVLX()){
1213 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1214 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1215 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1216 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1217 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1218
1219 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1220 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1221 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1222 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1223 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1224 } else {
1225 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1226 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1227 setOperationAction(ISD::MLOAD, VT, Custom);
1228 setOperationAction(ISD::MSTORE, VT, Custom);
1229 }
1230 }
1231
1232 if (Subtarget.hasDQI()) {
1233 for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) {
1234 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1235 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1236 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1237 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1238 }
1239 if (Subtarget.hasVLX()) {
1240 // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
1241 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1242 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1243 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1244 }
1245 }
1246 if (Subtarget.hasVLX()) {
1247 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1248 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1249 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1250 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1251 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1252 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1253 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1254 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
1255 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
1256 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1257 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1258 }
1259
1260 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1261 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1262 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1263 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1264 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1265 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1266 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1267 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1268 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1269 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1270 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1271
1272 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1273 setOperationAction(ISD::FFLOOR, VT, Legal);
1274 setOperationAction(ISD::FCEIL, VT, Legal);
1275 setOperationAction(ISD::FTRUNC, VT, Legal);
1276 setOperationAction(ISD::FRINT, VT, Legal);
1277 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1278 }
1279
1280 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
1281 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
1282
1283 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1284 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1285 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1286
1287 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1288 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1289 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1290 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1291 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1292
1293 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1294 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1295
1296 setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
1297 setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
1298
1299 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1300 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1301 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1302 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1303 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1304 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1305
1306
1307 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1308 setOperationAction(ISD::ABS, MVT::v4i64, Legal);
1309 setOperationAction(ISD::ABS, MVT::v2i64, Legal);
1310
1311 for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
1312 setOperationAction(ISD::ADD, VT, Custom);
1313 setOperationAction(ISD::SUB, VT, Custom);
1314 setOperationAction(ISD::MUL, VT, Custom);
1315 setOperationAction(ISD::SETCC, VT, Custom);
1316 setOperationAction(ISD::SELECT, VT, Custom);
1317 setOperationAction(ISD::TRUNCATE, VT, Custom);
1318
1319 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1320 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1321 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1322 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1323 setOperationAction(ISD::VSELECT, VT, Expand);
1324 }
1325
1326 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1327 setOperationAction(ISD::SMAX, VT, Legal);
1328 setOperationAction(ISD::UMAX, VT, Legal);
1329 setOperationAction(ISD::SMIN, VT, Legal);
1330 setOperationAction(ISD::UMIN, VT, Legal);
1331 setOperationAction(ISD::ABS, VT, Legal);
1332 setOperationAction(ISD::SRL, VT, Custom);
1333 setOperationAction(ISD::SHL, VT, Custom);
1334 setOperationAction(ISD::SRA, VT, Custom);
1335 setOperationAction(ISD::CTPOP, VT, Custom);
1336 setOperationAction(ISD::CTTZ, VT, Custom);
1337 }
1338
1339 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1340 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
1341 MVT::v8i64}) {
1342 setOperationAction(ISD::ROTL, VT, Custom);
1343 setOperationAction(ISD::ROTR, VT, Custom);
1344 }
1345
1346 // Need to promote to 64-bit even though we have 32-bit masked instructions
1347 // because the IR optimizers rearrange bitcasts around logic ops leaving
1348 // too many variations to handle if we don't promote them.
1349 setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
1350 setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
1351 setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
1352
1353 if (Subtarget.hasCDI()) {
1354 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1355 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
1356 MVT::v4i64, MVT::v8i64}) {
1357 setOperationAction(ISD::CTLZ, VT, Legal);
1358 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1359 }
1360 } // Subtarget.hasCDI()
1361
1362 if (Subtarget.hasDQI()) {
1363 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1364 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1365 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1366 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1367 }
1368
1369 if (Subtarget.hasVPOPCNTDQ()) {
1370 // VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512
1371 // version of popcntd/q.
1372 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64,
1373 MVT::v4i32, MVT::v2i64})
1374 setOperationAction(ISD::CTPOP, VT, Legal);
1375 }
1376
1377 // Custom legalize 2x32 to get a little better code.
1378 if (Subtarget.hasVLX()) {
1379 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1380 }
1381
1382 // Custom lower several nodes.
1383 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1384 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1385 setOperationAction(ISD::MGATHER, VT, Custom);
1386 setOperationAction(ISD::MSCATTER, VT, Custom);
1387 }
1388
1389 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v1i1, Legal);
1390
1391 // Extract subvector is special because the value type
1392 // (result) is 256-bit but the source is 512-bit wide.
1393 // 128-bit was made Legal under AVX1.
1394 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1395 MVT::v8f32, MVT::v4f64 })
1396 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1397 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
1398 MVT::v16i1, MVT::v32i1, MVT::v64i1 })
1399 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1400
1401 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1402 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1403 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1404 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1405 setOperationAction(ISD::VSELECT, VT, Custom);
1406 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1407 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1408 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1409 setOperationAction(ISD::MLOAD, VT, Legal);
1410 setOperationAction(ISD::MSTORE, VT, Legal);
1411 setOperationAction(ISD::MGATHER, VT, Legal);
1412 setOperationAction(ISD::MSCATTER, VT, Custom);
1413 }
1414 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1415 setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
1416 setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
1417 }
1418 }// has AVX-512
1419
1420 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1421 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1422 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1423
1424 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1425 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1426
1427 setOperationAction(ISD::ADD, MVT::v32i1, Custom);
1428 setOperationAction(ISD::ADD, MVT::v64i1, Custom);
1429 setOperationAction(ISD::SUB, MVT::v32i1, Custom);
1430 setOperationAction(ISD::SUB, MVT::v64i1, Custom);
1431 setOperationAction(ISD::MUL, MVT::v32i1, Custom);
1432 setOperationAction(ISD::MUL, MVT::v64i1, Custom);
1433
1434 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1435 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1436 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1437 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1438 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1439 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1440 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1441 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1442 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1443 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1444 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1445 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1446 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1447 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1448 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1449 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1450 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1451 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1452 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom);
1453 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom);
1454 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1455 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1456 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1457 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1458 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1459 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1460 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1461 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1462 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1463 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1464 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1465 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1466 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1467 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1468 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1469 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1470 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1471 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1472 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1473 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1474 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1475 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1476 setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom);
1477 setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom);
1478 setOperationAction(ISD::VSELECT, MVT::v32i1, Expand);
1479 setOperationAction(ISD::VSELECT, MVT::v64i1, Expand);
1480 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1481
1482 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1483
1484 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1485 if (Subtarget.hasVLX()) {
1486 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1487 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1488 }
1489
1490 LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
1491 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1492 setOperationAction(ISD::MLOAD, VT, Action);
1493 setOperationAction(ISD::MSTORE, VT, Action);
1494 }
1495
1496 if (Subtarget.hasCDI()) {
1497 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
1498 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
1499 }
1500
1501 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1502 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1503 setOperationAction(ISD::VSELECT, VT, Custom);
1504 setOperationAction(ISD::ABS, VT, Legal);
1505 setOperationAction(ISD::SRL, VT, Custom);
1506 setOperationAction(ISD::SHL, VT, Custom);
1507 setOperationAction(ISD::SRA, VT, Custom);
1508 setOperationAction(ISD::MLOAD, VT, Legal);
1509 setOperationAction(ISD::MSTORE, VT, Legal);
1510 setOperationAction(ISD::CTPOP, VT, Custom);
1511 setOperationAction(ISD::CTTZ, VT, Custom);
1512 setOperationAction(ISD::SMAX, VT, Legal);
1513 setOperationAction(ISD::UMAX, VT, Legal);
1514 setOperationAction(ISD::SMIN, VT, Legal);
1515 setOperationAction(ISD::UMIN, VT, Legal);
1516
1517 setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
1518 setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
1519 setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
1520 }
1521
1522 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1523 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1524 }
1525 }
1526
1527 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1528 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1529 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1530
1531 for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
1532 setOperationAction(ISD::ADD, VT, Custom);
1533 setOperationAction(ISD::SUB, VT, Custom);
1534 setOperationAction(ISD::MUL, VT, Custom);
1535 setOperationAction(ISD::VSELECT, VT, Expand);
1536
1537 setOperationAction(ISD::TRUNCATE, VT, Custom);
1538 setOperationAction(ISD::SETCC, VT, Custom);
1539 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1540 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1541 setOperationAction(ISD::SELECT, VT, Custom);
1542 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1543 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1544 }
1545
1546 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1547 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1548 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1549 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1550
1551 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1552 setOperationAction(ISD::SMAX, VT, Legal);
1553 setOperationAction(ISD::UMAX, VT, Legal);
1554 setOperationAction(ISD::SMIN, VT, Legal);
1555 setOperationAction(ISD::UMIN, VT, Legal);
1556 }
1557 }
1558
1559 if (Subtarget.hasBITALG())
1560 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v32i8,
1561 MVT::v16i16, MVT::v16i8, MVT::v8i16 })
1562 setOperationAction(ISD::CTPOP, VT, Legal);
1563
1564 // We want to custom lower some of our intrinsics.
1565 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1566 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1567 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1568 if (!Subtarget.is64Bit()) {
1569 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1570 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1571 }
1572
1573 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1574 // handle type legalization for these operations here.
1575 //
1576 // FIXME: We really should do custom legalization for addition and
1577 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1578 // than generic legalization for 64-bit multiplication-with-overflow, though.
1579 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1580 if (VT == MVT::i64 && !Subtarget.is64Bit())
1581 continue;
1582 // Add/Sub/Mul with overflow operations are custom lowered.
1583 setOperationAction(ISD::SADDO, VT, Custom);
1584 setOperationAction(ISD::UADDO, VT, Custom);
1585 setOperationAction(ISD::SSUBO, VT, Custom);
1586 setOperationAction(ISD::USUBO, VT, Custom);
1587 setOperationAction(ISD::SMULO, VT, Custom);
1588 setOperationAction(ISD::UMULO, VT, Custom);
1589
1590 // Support carry in as value rather than glue.
1591 setOperationAction(ISD::ADDCARRY, VT, Custom);
1592 setOperationAction(ISD::SUBCARRY, VT, Custom);
1593 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1594 }
1595
1596 if (!Subtarget.is64Bit()) {
1597 // These libcalls are not available in 32-bit.
1598 setLibcallName(RTLIB::SHL_I128, nullptr);
1599 setLibcallName(RTLIB::SRL_I128, nullptr);
1600 setLibcallName(RTLIB::SRA_I128, nullptr);
1601 setLibcallName(RTLIB::MUL_I128, nullptr);
1602 }
1603
1604 // Combine sin / cos into one node or libcall if possible.
1605 if (Subtarget.hasSinCos()) {
1606 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1607 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1608 if (Subtarget.isTargetDarwin()) {
1609 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1610 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1611 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1612 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1613 }
1614 }
1615
1616 if (Subtarget.isTargetWin64()) {
1617 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1618 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1619 setOperationAction(ISD::SREM, MVT::i128, Custom);
1620 setOperationAction(ISD::UREM, MVT::i128, Custom);
1621 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1622 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1623 }
1624
1625 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1626 // is. We should promote the value to 64-bits to solve this.
1627 // This is what the CRT headers do - `fmodf` is an inline header
1628 // function casting to f64 and calling `fmod`.
1629 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1630 Subtarget.isTargetWindowsItanium()))
1631 for (ISD::NodeType Op :
1632 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1633 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1634 if (isOperationExpand(Op, MVT::f32))
1635 setOperationAction(Op, MVT::f32, Promote);
1636
1637 // We have target-specific dag combine patterns for the following nodes:
1638 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1639 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1640 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1641 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1642 setTargetDAGCombine(ISD::BITCAST);
1643 setTargetDAGCombine(ISD::VSELECT);
1644 setTargetDAGCombine(ISD::SELECT);
1645 setTargetDAGCombine(ISD::SHL);
1646 setTargetDAGCombine(ISD::SRA);
1647 setTargetDAGCombine(ISD::SRL);
1648 setTargetDAGCombine(ISD::OR);
1649 setTargetDAGCombine(ISD::AND);
1650 setTargetDAGCombine(ISD::ADD);
1651 setTargetDAGCombine(ISD::FADD);
1652 setTargetDAGCombine(ISD::FSUB);
1653 setTargetDAGCombine(ISD::FNEG);
1654 setTargetDAGCombine(ISD::FMA);
1655 setTargetDAGCombine(ISD::FMINNUM);
1656 setTargetDAGCombine(ISD::FMAXNUM);
1657 setTargetDAGCombine(ISD::SUB);
1658 setTargetDAGCombine(ISD::LOAD);
1659 setTargetDAGCombine(ISD::MLOAD);
1660 setTargetDAGCombine(ISD::STORE);
1661 setTargetDAGCombine(ISD::MSTORE);
1662 setTargetDAGCombine(ISD::TRUNCATE);
1663 setTargetDAGCombine(ISD::ZERO_EXTEND);
1664 setTargetDAGCombine(ISD::ANY_EXTEND);
1665 setTargetDAGCombine(ISD::SIGN_EXTEND);
1666 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1667 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1668 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1669 setTargetDAGCombine(ISD::SINT_TO_FP);
1670 setTargetDAGCombine(ISD::UINT_TO_FP);
1671 setTargetDAGCombine(ISD::SETCC);
1672 setTargetDAGCombine(ISD::MUL);
1673 setTargetDAGCombine(ISD::XOR);
1674 setTargetDAGCombine(ISD::MSCATTER);
1675 setTargetDAGCombine(ISD::MGATHER);
1676
1677 computeRegisterProperties(Subtarget.getRegisterInfo());
1678
1679 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1680 MaxStoresPerMemsetOptSize = 8;
1681 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1682 MaxStoresPerMemcpyOptSize = 4;
1683 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1684 MaxStoresPerMemmoveOptSize = 4;
1685
1686 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1687 // that needs to benchmarked and balanced with the potential use of vector
1688 // load/store types (PR33329, PR33914).
1689 MaxLoadsPerMemcmp = 2;
1690 MaxLoadsPerMemcmpOptSize = 2;
1691
1692 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1693 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1694
1695 // An out-of-order CPU can speculatively execute past a predictable branch,
1696 // but a conditional move could be stalled by an expensive earlier operation.
1697 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1698 EnableExtLdPromotion = true;
1699 setPrefFunctionAlignment(4); // 2^4 bytes.
1700
1701 verifyIntrinsicTables();
1702}
1703
1704// This has so far only been implemented for 64-bit MachO.
1705bool X86TargetLowering::useLoadStackGuardNode() const {
1706 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1707}
1708
1709TargetLoweringBase::LegalizeTypeAction
1710X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1711 if (ExperimentalVectorWideningLegalization &&
1712 VT.getVectorNumElements() != 1 &&
1713 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1714 return TypeWidenVector;
1715
1716 return TargetLoweringBase::getPreferredVectorAction(VT);
1717}
1718
1719EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1720 LLVMContext& Context,
1721 EVT VT) const {
1722 if (!VT.isVector())
1723 return MVT::i8;
1724
1725 if (VT.isSimple()) {
1726 MVT VVT = VT.getSimpleVT();
1727 const unsigned NumElts = VVT.getVectorNumElements();
1728 MVT EltVT = VVT.getVectorElementType();
1729 if (VVT.is512BitVector()) {
1730 if (Subtarget.hasAVX512())
1731 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1732 EltVT == MVT::f32 || EltVT == MVT::f64)
1733 switch(NumElts) {
1734 case 8: return MVT::v8i1;
1735 case 16: return MVT::v16i1;
1736 }
1737 if (Subtarget.hasBWI())
1738 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1739 switch(NumElts) {
1740 case 32: return MVT::v32i1;
1741 case 64: return MVT::v64i1;
1742 }
1743 }
1744
1745 if (Subtarget.hasBWI() && Subtarget.hasVLX())
1746 return MVT::getVectorVT(MVT::i1, NumElts);
1747
1748 if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) {
1749 EVT LegalVT = getTypeToTransformTo(Context, VT);
1750 EltVT = LegalVT.getVectorElementType().getSimpleVT();
1751 }
1752
1753 if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32)
1754 switch(NumElts) {
1755 case 2: return MVT::v2i1;
1756 case 4: return MVT::v4i1;
1757 case 8: return MVT::v8i1;
1758 }
1759 }
1760
1761 return VT.changeVectorElementTypeToInteger();
1762}
1763
1764/// Helper for getByValTypeAlignment to determine
1765/// the desired ByVal argument alignment.
1766static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1767 if (MaxAlign == 16)
1768 return;
1769 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1770 if (VTy->getBitWidth() == 128)
1771 MaxAlign = 16;
1772 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1773 unsigned EltAlign = 0;
1774 getMaxByValAlign(ATy->getElementType(), EltAlign);
1775 if (EltAlign > MaxAlign)
1776 MaxAlign = EltAlign;
1777 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1778 for (auto *EltTy : STy->elements()) {
1779 unsigned EltAlign = 0;
1780 getMaxByValAlign(EltTy, EltAlign);
1781 if (EltAlign > MaxAlign)
1782 MaxAlign = EltAlign;
1783 if (MaxAlign == 16)
1784 break;
1785 }
1786 }
1787}
1788
1789/// Return the desired alignment for ByVal aggregate
1790/// function arguments in the caller parameter area. For X86, aggregates
1791/// that contain SSE vectors are placed at 16-byte boundaries while the rest
1792/// are at 4-byte boundaries.
1793unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1794 const DataLayout &DL) const {
1795 if (Subtarget.is64Bit()) {
1796 // Max of 8 and alignment of type.
1797 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1798 if (TyAlign > 8)
1799 return TyAlign;
1800 return 8;
1801 }
1802
1803 unsigned Align = 4;
1804 if (Subtarget.hasSSE1())
1805 getMaxByValAlign(Ty, Align);
1806 return Align;
1807}
1808
1809/// Returns the target specific optimal type for load
1810/// and store operations as a result of memset, memcpy, and memmove
1811/// lowering. If DstAlign is zero that means it's safe to destination
1812/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1813/// means there isn't a need to check it against alignment requirement,
1814/// probably because the source does not need to be loaded. If 'IsMemset' is
1815/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1816/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1817/// source is constant so it does not need to be loaded.
1818/// It returns EVT::Other if the type should be determined using generic
1819/// target-independent logic.
1820EVT
1821X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1822 unsigned DstAlign, unsigned SrcAlign,
1823 bool IsMemset, bool ZeroMemset,
1824 bool MemcpyStrSrc,
1825 MachineFunction &MF) const {
1826 const Function *F = MF.getFunction();
1827 if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1828 if (Size >= 16 &&
1829 (!Subtarget.isUnalignedMem16Slow() ||
1830 ((DstAlign == 0 || DstAlign >= 16) &&
1831 (SrcAlign == 0 || SrcAlign >= 16)))) {
1832 // FIXME: Check if unaligned 32-byte accesses are slow.
1833 if (Size >= 32 && Subtarget.hasAVX()) {
1834 // Although this isn't a well-supported type for AVX1, we'll let
1835 // legalization and shuffle lowering produce the optimal codegen. If we
1836 // choose an optimal type with a vector element larger than a byte,
1837 // getMemsetStores() may create an intermediate splat (using an integer
1838 // multiply) before we splat as a vector.
1839 return MVT::v32i8;
1840 }
1841 if (Subtarget.hasSSE2())
1842 return MVT::v16i8;
1843 // TODO: Can SSE1 handle a byte vector?
1844 if (Subtarget.hasSSE1())
1845 return MVT::v4f32;
1846 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
1847 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
1848 // Do not use f64 to lower memcpy if source is string constant. It's
1849 // better to use i32 to avoid the loads.
1850 // Also, do not use f64 to lower memset unless this is a memset of zeros.
1851 // The gymnastics of splatting a byte value into an XMM register and then
1852 // only using 8-byte stores (because this is a CPU with slow unaligned
1853 // 16-byte accesses) makes that a loser.
1854 return MVT::f64;
1855 }
1856 }
1857 // This is a compromise. If we reach here, unaligned accesses may be slow on
1858 // this target. However, creating smaller, aligned accesses could be even
1859 // slower and would certainly be a lot more code.
1860 if (Subtarget.is64Bit() && Size >= 8)
1861 return MVT::i64;
1862 return MVT::i32;
1863}
1864
1865bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1866 if (VT == MVT::f32)
1867 return X86ScalarSSEf32;
1868 else if (VT == MVT::f64)
1869 return X86ScalarSSEf64;
1870 return true;
1871}
1872
1873bool
1874X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1875 unsigned,
1876 unsigned,
1877 bool *Fast) const {
1878 if (Fast) {
1879 switch (VT.getSizeInBits()) {
1880 default:
1881 // 8-byte and under are always assumed to be fast.
1882 *Fast = true;
1883 break;
1884 case 128:
1885 *Fast = !Subtarget.isUnalignedMem16Slow();
1886 break;
1887 case 256:
1888 *Fast = !Subtarget.isUnalignedMem32Slow();
1889 break;
1890 // TODO: What about AVX-512 (512-bit) accesses?
1891 }
1892 }
1893 // Misaligned accesses of any size are always allowed.
1894 return true;
1895}
1896
1897/// Return the entry encoding for a jump table in the
1898/// current function. The returned value is a member of the
1899/// MachineJumpTableInfo::JTEntryKind enum.
1900unsigned X86TargetLowering::getJumpTableEncoding() const {
1901 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1902 // symbol.
1903 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
1904 return MachineJumpTableInfo::EK_Custom32;
1905
1906 // Otherwise, use the normal jump table encoding heuristics.
1907 return TargetLowering::getJumpTableEncoding();
1908}
1909
1910bool X86TargetLowering::useSoftFloat() const {
1911 return Subtarget.useSoftFloat();
1912}
1913
1914void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
1915 ArgListTy &Args) const {
1916
1917 // Only relabel X86-32 for C / Stdcall CCs.
1918 if (Subtarget.is64Bit())
1919 return;
1920 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
1921 return;
1922 unsigned ParamRegs = 0;
1923 if (auto *M = MF->getFunction()->getParent())
1924 ParamRegs = M->getNumberRegisterParameters();
1925
1926 // Mark the first N int arguments as having reg
1927 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
1928 Type *T = Args[Idx].Ty;
1929 if (T->isPointerTy() || T->isIntegerTy())
1930 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
1931 unsigned numRegs = 1;
1932 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
1933 numRegs = 2;
1934 if (ParamRegs < numRegs)
1935 return;
1936 ParamRegs -= numRegs;
1937 Args[Idx].IsInReg = true;
1938 }
1939 }
1940}
1941
1942const MCExpr *
1943X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1944 const MachineBasicBlock *MBB,
1945 unsigned uid,MCContext &Ctx) const{
1946 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 1946, __extension__ __PRETTY_FUNCTION__))
;
1947 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1948 // entries.
1949 return MCSymbolRefExpr::create(MBB->getSymbol(),
1950 MCSymbolRefExpr::VK_GOTOFF, Ctx);
1951}
1952
1953/// Returns relocation base for the given PIC jumptable.
1954SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
1955 SelectionDAG &DAG) const {
1956 if (!Subtarget.is64Bit())
1957 // This doesn't have SDLoc associated with it, but is not really the
1958 // same as a Register.
1959 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
1960 getPointerTy(DAG.getDataLayout()));
1961 return Table;
1962}
1963
1964/// This returns the relocation base for the given PIC jumptable,
1965/// the same as getPICJumpTableRelocBase, but as an MCExpr.
1966const MCExpr *X86TargetLowering::
1967getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
1968 MCContext &Ctx) const {
1969 // X86-64 uses RIP relative addressing based on the jump table label.
1970 if (Subtarget.isPICStyleRIPRel())
1971 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
1972
1973 // Otherwise, the reference is relative to the PIC base.
1974 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
1975}
1976
1977std::pair<const TargetRegisterClass *, uint8_t>
1978X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1979 MVT VT) const {
1980 const TargetRegisterClass *RRC = nullptr;
1981 uint8_t Cost = 1;
1982 switch (VT.SimpleTy) {
1983 default:
1984 return TargetLowering::findRepresentativeClass(TRI, VT);
1985 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
1986 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
1987 break;
1988 case MVT::x86mmx:
1989 RRC = &X86::VR64RegClass;
1990 break;
1991 case MVT::f32: case MVT::f64:
1992 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1993 case MVT::v4f32: case MVT::v2f64:
1994 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
1995 case MVT::v8f32: case MVT::v4f64:
1996 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
1997 case MVT::v16f32: case MVT::v8f64:
1998 RRC = &X86::VR128XRegClass;
1999 break;
2000 }
2001 return std::make_pair(RRC, Cost);
2002}
2003
2004unsigned X86TargetLowering::getAddressSpace() const {
2005 if (Subtarget.is64Bit())
2006 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2007 return 256;
2008}
2009
2010static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2011 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2012 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2013}
2014
2015static Constant* SegmentOffset(IRBuilder<> &IRB,
2016 unsigned Offset, unsigned AddressSpace) {
2017 return ConstantExpr::getIntToPtr(
2018 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2019 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2020}
2021
2022Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2023 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2024 // tcbhead_t; use it instead of the usual global variable (see
2025 // sysdeps/{i386,x86_64}/nptl/tls.h)
2026 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2027 if (Subtarget.isTargetFuchsia()) {
2028 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2029 return SegmentOffset(IRB, 0x10, getAddressSpace());
2030 } else {
2031 // %fs:0x28, unless we're using a Kernel code model, in which case
2032 // it's %gs:0x28. gs:0x14 on i386.
2033 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2034 return SegmentOffset(IRB, Offset, getAddressSpace());
2035 }
2036 }
2037
2038 return TargetLowering::getIRStackGuard(IRB);
2039}
2040
2041void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2042 // MSVC CRT provides functionalities for stack protection.
2043 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
2044 // MSVC CRT has a global variable holding security cookie.
2045 M.getOrInsertGlobal("__security_cookie",
2046 Type::getInt8PtrTy(M.getContext()));
2047
2048 // MSVC CRT has a function to validate security cookie.
2049 auto *SecurityCheckCookie = cast<Function>(
2050 M.getOrInsertFunction("__security_check_cookie",
2051 Type::getVoidTy(M.getContext()),
2052 Type::getInt8PtrTy(M.getContext())));
2053 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2054 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2055 return;
2056 }
2057 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2058 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2059 return;
2060 TargetLowering::insertSSPDeclarations(M);
2061}
2062
2063Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2064 // MSVC CRT has a global variable holding security cookie.
2065 if (Subtarget.getTargetTriple().isOSMSVCRT())
2066 return M.getGlobalVariable("__security_cookie");
2067 return TargetLowering::getSDagStackGuard(M);
2068}
2069
2070Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2071 // MSVC CRT has a function to validate security cookie.
2072 if (Subtarget.getTargetTriple().isOSMSVCRT())
2073 return M.getFunction("__security_check_cookie");
2074 return TargetLowering::getSSPStackGuardCheck(M);
2075}
2076
2077Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2078 if (Subtarget.getTargetTriple().isOSContiki())
2079 return getDefaultSafeStackPointerLocation(IRB, false);
2080
2081 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2082 // definition of TLS_SLOT_SAFESTACK in
2083 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2084 if (Subtarget.isTargetAndroid()) {
2085 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2086 // %gs:0x24 on i386
2087 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2088 return SegmentOffset(IRB, Offset, getAddressSpace());
2089 }
2090
2091 // Fuchsia is similar.
2092 if (Subtarget.isTargetFuchsia()) {
2093 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2094 return SegmentOffset(IRB, 0x18, getAddressSpace());
2095 }
2096
2097 return TargetLowering::getSafeStackPointerLocation(IRB);
2098}
2099
2100bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2101 unsigned DestAS) const {
2102 assert(SrcAS != DestAS && "Expected different address spaces!")(static_cast <bool> (SrcAS != DestAS && "Expected different address spaces!"
) ? void (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2102, __extension__ __PRETTY_FUNCTION__))
;
2103
2104 return SrcAS < 256 && DestAS < 256;
2105}
2106
2107//===----------------------------------------------------------------------===//
2108// Return Value Calling Convention Implementation
2109//===----------------------------------------------------------------------===//
2110
2111#include "X86GenCallingConv.inc"
2112
2113bool X86TargetLowering::CanLowerReturn(
2114 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2115 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2116 SmallVector<CCValAssign, 16> RVLocs;
2117 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2118 return CCInfo.CheckReturn(Outs, RetCC_X86);
2119}
2120
2121const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2122 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2123 return ScratchRegs;
2124}
2125
2126/// Lowers masks values (v*i1) to the local register values
2127/// \returns DAG node after lowering to register type
2128static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2129 const SDLoc &Dl, SelectionDAG &DAG) {
2130 EVT ValVT = ValArg.getValueType();
2131
2132 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2133 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2134 // Two stage lowering might be required
2135 // bitcast: v8i1 -> i8 / v16i1 -> i16
2136 // anyextend: i8 -> i32 / i16 -> i32
2137 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2138 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2139 if (ValLoc == MVT::i32)
2140 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2141 return ValToCopy;
2142 } else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2143 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2144 // One stage lowering is required
2145 // bitcast: v32i1 -> i32 / v64i1 -> i64
2146 return DAG.getBitcast(ValLoc, ValArg);
2147 } else
2148 return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
2149}
2150
2151/// Breaks v64i1 value into two registers and adds the new node to the DAG
2152static void Passv64i1ArgInRegs(
2153 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2154 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2155 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2156 assert((Subtarget.hasBWI() || Subtarget.hasBMI()) &&(static_cast <bool> ((Subtarget.hasBWI() || Subtarget.hasBMI
()) && "Expected AVX512BW or AVX512BMI target!") ? void
(0) : __assert_fail ("(Subtarget.hasBWI() || Subtarget.hasBMI()) && \"Expected AVX512BW or AVX512BMI target!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2157, __extension__ __PRETTY_FUNCTION__))
2157 "Expected AVX512BW or AVX512BMI target!")(static_cast <bool> ((Subtarget.hasBWI() || Subtarget.hasBMI
()) && "Expected AVX512BW or AVX512BMI target!") ? void
(0) : __assert_fail ("(Subtarget.hasBWI() || Subtarget.hasBMI()) && \"Expected AVX512BW or AVX512BMI target!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2157, __extension__ __PRETTY_FUNCTION__))
;
2158 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2158, __extension__ __PRETTY_FUNCTION__))
;
2159 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2159, __extension__ __PRETTY_FUNCTION__))
;
2160 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2161, __extension__ __PRETTY_FUNCTION__))
2161 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2161, __extension__ __PRETTY_FUNCTION__))
;
2162
2163 // Before splitting the value we cast it to i64
2164 Arg = DAG.getBitcast(MVT::i64, Arg);
2165
2166 // Splitting the value into two i32 types
2167 SDValue Lo, Hi;
2168 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2169 DAG.getConstant(0, Dl, MVT::i32));
2170 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2171 DAG.getConstant(1, Dl, MVT::i32));
2172
2173 // Attach the two i32 types into corresponding registers
2174 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2175 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2176}
2177
2178SDValue
2179X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2180 bool isVarArg,
2181 const SmallVectorImpl<ISD::OutputArg> &Outs,
2182 const SmallVectorImpl<SDValue> &OutVals,
2183 const SDLoc &dl, SelectionDAG &DAG) const {
2184 MachineFunction &MF = DAG.getMachineFunction();
2185 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2186
2187 // In some cases we need to disable registers from the default CSR list.
2188 // For example, when they are used for argument passing.
2189 bool ShouldDisableCalleeSavedRegister =
2190 CallConv == CallingConv::X86_RegCall ||
2191 MF.getFunction()->hasFnAttribute("no_caller_saved_registers");
2192
2193 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2194 report_fatal_error("X86 interrupts may not return any value");
2195
2196 SmallVector<CCValAssign, 16> RVLocs;
2197 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2198 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2199
2200 SDValue Flag;
2201 SmallVector<SDValue, 6> RetOps;
2202 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2203 // Operand #1 = Bytes To Pop
2204 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2205 MVT::i32));
2206
2207 // Copy the result values into the output registers.
2208 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2209 ++I, ++OutsIndex) {
2210 CCValAssign &VA = RVLocs[I];
2211 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2211, __extension__ __PRETTY_FUNCTION__))
;
2212
2213 // Add the register to the CalleeSaveDisableRegs list.
2214 if (ShouldDisableCalleeSavedRegister)
2215 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2216
2217 SDValue ValToCopy = OutVals[OutsIndex];
2218 EVT ValVT = ValToCopy.getValueType();
2219
2220 // Promote values to the appropriate types.
2221 if (VA.getLocInfo() == CCValAssign::SExt)
2222 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2223 else if (VA.getLocInfo() == CCValAssign::ZExt)
2224 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2225 else if (VA.getLocInfo() == CCValAssign::AExt) {
2226 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2227 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2228 else
2229 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2230 }
2231 else if (VA.getLocInfo() == CCValAssign::BCvt)
2232 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2233
2234 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2235, __extension__ __PRETTY_FUNCTION__))
2235 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2235, __extension__ __PRETTY_FUNCTION__))
;
2236
2237 // If this is x86-64, and we disabled SSE, we can't return FP values,
2238 // or SSE or MMX vectors.
2239 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2240 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2241 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2242 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2243 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2244 } else if (ValVT == MVT::f64 &&
2245 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2246 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2247 // llvm-gcc has never done it right and no one has noticed, so this
2248 // should be OK for now.
2249 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2250 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2251 }
2252
2253 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2254 // the RET instruction and handled by the FP Stackifier.
2255 if (VA.getLocReg() == X86::FP0 ||
2256 VA.getLocReg() == X86::FP1) {
2257 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2258 // change the value to the FP stack register class.
2259 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2260 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2261 RetOps.push_back(ValToCopy);
2262 // Don't emit a copytoreg.
2263 continue;
2264 }
2265
2266 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2267 // which is returned in RAX / RDX.
2268 if (Subtarget.is64Bit()) {
2269 if (ValVT == MVT::x86mmx) {
2270 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2271 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2272 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2273 ValToCopy);
2274 // If we don't have SSE2 available, convert to v4f32 so the generated
2275 // register is legal.
2276 if (!Subtarget.hasSSE2())
2277 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2278 }
2279 }
2280 }
2281
2282 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2283
2284 if (VA.needsCustom()) {
2285 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2286, __extension__ __PRETTY_FUNCTION__))
2286 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2286, __extension__ __PRETTY_FUNCTION__))
;
2287
2288 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2289 Subtarget);
2290
2291 assert(2 == RegsToPass.size() &&(static_cast <bool> (2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? void (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2292, __extension__ __PRETTY_FUNCTION__))
2292 "Expecting two registers after Pass64BitArgInRegs")(static_cast <bool> (2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? void (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2292, __extension__ __PRETTY_FUNCTION__))
;
2293
2294 // Add the second register to the CalleeSaveDisableRegs list.
2295 if (ShouldDisableCalleeSavedRegister)
2296 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2297 } else {
2298 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2299 }
2300
2301 // Add nodes to the DAG and add the values into the RetOps list
2302 for (auto &Reg : RegsToPass) {
2303 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2304 Flag = Chain.getValue(1);
2305 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2306 }
2307 }
2308
2309 // Swift calling convention does not require we copy the sret argument
2310 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2311
2312 // All x86 ABIs require that for returning structs by value we copy
2313 // the sret argument into %rax/%eax (depending on ABI) for the return.
2314 // We saved the argument into a virtual register in the entry block,
2315 // so now we copy the value out and into %rax/%eax.
2316 //
2317 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2318 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2319 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2320 // either case FuncInfo->setSRetReturnReg() will have been called.
2321 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2322 // When we have both sret and another return value, we should use the
2323 // original Chain stored in RetOps[0], instead of the current Chain updated
2324 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2325
2326 // For the case of sret and another return value, we have
2327 // Chain_0 at the function entry
2328 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2329 // If we use Chain_1 in getCopyFromReg, we will have
2330 // Val = getCopyFromReg(Chain_1)
2331 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2332
2333 // getCopyToReg(Chain_0) will be glued together with
2334 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2335 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2336 // Data dependency from Unit B to Unit A due to usage of Val in
2337 // getCopyToReg(Chain_1, Val)
2338 // Chain dependency from Unit A to Unit B
2339
2340 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2341 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2342 getPointerTy(MF.getDataLayout()));
2343
2344 unsigned RetValReg
2345 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2346 X86::RAX : X86::EAX;
2347 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2348 Flag = Chain.getValue(1);
2349
2350 // RAX/EAX now acts like a return value.
2351 RetOps.push_back(
2352 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2353
2354 // Add the returned register to the CalleeSaveDisableRegs list.
2355 if (ShouldDisableCalleeSavedRegister)
2356 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2357 }
2358
2359 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2360 const MCPhysReg *I =
2361 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2362 if (I) {
2363 for (; *I; ++I) {
2364 if (X86::GR64RegClass.contains(*I))
2365 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2366 else
2367 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2367)
;
2368 }
2369 }
2370
2371 RetOps[0] = Chain; // Update chain.
2372
2373 // Add the flag if we have it.
2374 if (Flag.getNode())
2375 RetOps.push_back(Flag);
2376
2377 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2378 if (CallConv == CallingConv::X86_INTR)
2379 opcode = X86ISD::IRET;
2380 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2381}
2382
2383bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2384 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2385 return false;
2386
2387 SDValue TCChain = Chain;
2388 SDNode *Copy = *N->use_begin();
2389 if (Copy->getOpcode() == ISD::CopyToReg) {
2390 // If the copy has a glue operand, we conservatively assume it isn't safe to
2391 // perform a tail call.
2392 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2393 return false;
2394 TCChain = Copy->getOperand(0);
2395 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2396 return false;
2397
2398 bool HasRet = false;
2399 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2400 UI != UE; ++UI) {
2401 if (UI->getOpcode() != X86ISD::RET_FLAG)
2402 return false;
2403 // If we are returning more than one value, we can definitely
2404 // not make a tail call see PR19530
2405 if (UI->getNumOperands() > 4)
2406 return false;
2407 if (UI->getNumOperands() == 4 &&
2408 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2409 return false;
2410 HasRet = true;
2411 }
2412
2413 if (!HasRet)
2414 return false;
2415
2416 Chain = TCChain;
2417 return true;
2418}
2419
2420EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2421 ISD::NodeType ExtendKind) const {
2422 MVT ReturnMVT = MVT::i32;
2423
2424 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2425 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2426 // The ABI does not require i1, i8 or i16 to be extended.
2427 //
2428 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2429 // always extending i8/i16 return values, so keep doing that for now.
2430 // (PR26665).
2431 ReturnMVT = MVT::i8;
2432 }
2433
2434 EVT MinVT = getRegisterType(Context, ReturnMVT);
2435 return VT.bitsLT(MinVT) ? MinVT : VT;
2436}
2437
2438/// Reads two 32 bit registers and creates a 64 bit mask value.
2439/// \param VA The current 32 bit value that need to be assigned.
2440/// \param NextVA The next 32 bit value that need to be assigned.
2441/// \param Root The parent DAG node.
2442/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2443/// glue purposes. In the case the DAG is already using
2444/// physical register instead of virtual, we should glue
2445/// our new SDValue to InFlag SDvalue.
2446/// \return a new SDvalue of size 64bit.
2447static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2448 SDValue &Root, SelectionDAG &DAG,
2449 const SDLoc &Dl, const X86Subtarget &Subtarget,
2450 SDValue *InFlag = nullptr) {
2451 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2451, __extension__ __PRETTY_FUNCTION__))
;
2452 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2452, __extension__ __PRETTY_FUNCTION__))
;
2453 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2454, __extension__ __PRETTY_FUNCTION__))
2454 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2454, __extension__ __PRETTY_FUNCTION__))
;
2455 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2456, __extension__ __PRETTY_FUNCTION__))
2456 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2456, __extension__ __PRETTY_FUNCTION__))
;
2457 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2458, __extension__ __PRETTY_FUNCTION__))
2458 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2458, __extension__ __PRETTY_FUNCTION__))
;
2459
2460 SDValue Lo, Hi;
2461 unsigned Reg;
2462 SDValue ArgValueLo, ArgValueHi;
2463
2464 MachineFunction &MF = DAG.getMachineFunction();
2465 const TargetRegisterClass *RC = &X86::GR32RegClass;
2466
2467 // Read a 32 bit value from the registers
2468 if (nullptr == InFlag) {
2469 // When no physical register is present,
2470 // create an intermediate virtual register
2471 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2472 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2473 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2474 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2475 } else {
2476 // When a physical register is available read the value from it and glue
2477 // the reads together.
2478 ArgValueLo =
2479 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2480 *InFlag = ArgValueLo.getValue(2);
2481 ArgValueHi =
2482 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2483 *InFlag = ArgValueHi.getValue(2);
2484 }
2485
2486 // Convert the i32 type into v32i1 type
2487 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2488
2489 // Convert the i32 type into v32i1 type
2490 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2491
2492 // Concatenate the two values together
2493 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2494}
2495
2496/// The function will lower a register of various sizes (8/16/32/64)
2497/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2498/// \returns a DAG node contains the operand after lowering to mask type.
2499static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2500 const EVT &ValLoc, const SDLoc &Dl,
2501 SelectionDAG &DAG) {
2502 SDValue ValReturned = ValArg;
2503
2504 if (ValVT == MVT::v1i1)
2505 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2506
2507 if (ValVT == MVT::v64i1) {
2508 // In 32 bit machine, this case is handled by getv64i1Argument
2509 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2509, __extension__ __PRETTY_FUNCTION__))
;
2510 // In 64 bit machine, There is no need to truncate the value only bitcast
2511 } else {
2512 MVT maskLen;
2513 switch (ValVT.getSimpleVT().SimpleTy) {
2514 case MVT::v8i1:
2515 maskLen = MVT::i8;
2516 break;
2517 case MVT::v16i1:
2518 maskLen = MVT::i16;
2519 break;
2520 case MVT::v32i1:
2521 maskLen = MVT::i32;
2522 break;
2523 default:
2524 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2524)
;
2525 }
2526
2527 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2528 }
2529 return DAG.getBitcast(ValVT, ValReturned);
2530}
2531
2532/// Lower the result values of a call into the
2533/// appropriate copies out of appropriate physical registers.
2534///
2535SDValue X86TargetLowering::LowerCallResult(
2536 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2537 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2538 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2539 uint32_t *RegMask) const {
2540
2541 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2542 // Assign locations to each value returned by this call.
2543 SmallVector<CCValAssign, 16> RVLocs;
2544 bool Is64Bit = Subtarget.is64Bit();
2545 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2546 *DAG.getContext());
2547 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2548
2549 // Copy all of the result registers out of their specified physreg.
2550 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2551 ++I, ++InsIndex) {
2552 CCValAssign &VA = RVLocs[I];
2553 EVT CopyVT = VA.getLocVT();
2554
2555 // In some calling conventions we need to remove the used registers
2556 // from the register mask.
2557 if (RegMask) {
2558 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2559 SubRegs.isValid(); ++SubRegs)
2560 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2561 }
2562
2563 // If this is x86-64, and we disabled SSE, we can't return FP values
2564 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2565 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2566 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2567 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2568 }
2569
2570 // If we prefer to use the value in xmm registers, copy it out as f80 and
2571 // use a truncate to move it from fp stack reg to xmm reg.
2572 bool RoundAfterCopy = false;
2573 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2574 isScalarFPTypeInSSEReg(VA.getValVT())) {
2575 if (!Subtarget.hasX87())
2576 report_fatal_error("X87 register return with X87 disabled");
2577 CopyVT = MVT::f80;
2578 RoundAfterCopy = (CopyVT != VA.getLocVT());
2579 }
2580
2581 SDValue Val;
2582 if (VA.needsCustom()) {
2583 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2584, __extension__ __PRETTY_FUNCTION__))
2584 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2584, __extension__ __PRETTY_FUNCTION__))
;
2585 Val =
2586 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2587 } else {
2588 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2589 .getValue(1);
2590 Val = Chain.getValue(0);
2591 InFlag = Chain.getValue(2);
2592 }
2593
2594 if (RoundAfterCopy)
2595 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2596 // This truncation won't change the value.
2597 DAG.getIntPtrConstant(1, dl));
2598
2599 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2600 if (VA.getValVT().isVector() &&
2601 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2602 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2603 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2604 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2605 } else
2606 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2607 }
2608
2609 InVals.push_back(Val);
2610 }
2611
2612 return Chain;
2613}
2614
2615//===----------------------------------------------------------------------===//
2616// C & StdCall & Fast Calling Convention implementation
2617//===----------------------------------------------------------------------===//
2618// StdCall calling convention seems to be standard for many Windows' API
2619// routines and around. It differs from C calling convention just a little:
2620// callee should clean up the stack, not caller. Symbols should be also
2621// decorated in some fancy way :) It doesn't support any vector arguments.
2622// For info on fast calling convention see Fast Calling Convention (tail call)
2623// implementation LowerX86_32FastCCCallTo.
2624
2625/// CallIsStructReturn - Determines whether a call uses struct return
2626/// semantics.
2627enum StructReturnType {
2628 NotStructReturn,
2629 RegStructReturn,
2630 StackStructReturn
2631};
2632static StructReturnType
2633callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2634 if (Outs.empty())
2635 return NotStructReturn;
2636
2637 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2638 if (!Flags.isSRet())
2639 return NotStructReturn;
2640 if (Flags.isInReg() || IsMCU)
2641 return RegStructReturn;
2642 return StackStructReturn;
2643}
2644
2645/// Determines whether a function uses struct return semantics.
2646static StructReturnType
2647argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2648 if (Ins.empty())
2649 return NotStructReturn;
2650
2651 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2652 if (!Flags.isSRet())
2653 return NotStructReturn;
2654 if (Flags.isInReg() || IsMCU)
2655 return RegStructReturn;
2656 return StackStructReturn;
2657}
2658
2659/// Make a copy of an aggregate at address specified by "Src" to address
2660/// "Dst" with size and alignment information specified by the specific
2661/// parameter attribute. The copy will be passed as a byval function parameter.
2662static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2663 SDValue Chain, ISD::ArgFlagsTy Flags,
2664 SelectionDAG &DAG, const SDLoc &dl) {
2665 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2666
2667 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2668 /*isVolatile*/false, /*AlwaysInline=*/true,
2669 /*isTailCall*/false,
2670 MachinePointerInfo(), MachinePointerInfo());
2671}
2672
2673/// Return true if the calling convention is one that we can guarantee TCO for.
2674static bool canGuaranteeTCO(CallingConv::ID CC) {
2675 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2676 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2677 CC == CallingConv::HHVM);
2678}
2679
2680/// Return true if we might ever do TCO for calls with this calling convention.
2681static bool mayTailCallThisCC(CallingConv::ID CC) {
2682 switch (CC) {
2683 // C calling conventions:
2684 case CallingConv::C:
2685 case CallingConv::Win64:
2686 case CallingConv::X86_64_SysV:
2687 // Callee pop conventions:
2688 case CallingConv::X86_ThisCall:
2689 case CallingConv::X86_StdCall:
2690 case CallingConv::X86_VectorCall:
2691 case CallingConv::X86_FastCall:
2692 return true;
2693 default:
2694 return canGuaranteeTCO(CC);
2695 }
2696}
2697
2698/// Return true if the function is being made into a tailcall target by
2699/// changing its ABI.
2700static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2701 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2702}
2703
2704bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2705 auto Attr =
2706 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2707 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2708 return false;
2709
2710 ImmutableCallSite CS(CI);
2711 CallingConv::ID CalleeCC = CS.getCallingConv();
2712 if (!mayTailCallThisCC(CalleeCC))
2713 return false;
2714
2715 return true;
2716}
2717
2718SDValue
2719X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2720 const SmallVectorImpl<ISD::InputArg> &Ins,
2721 const SDLoc &dl, SelectionDAG &DAG,
2722 const CCValAssign &VA,
2723 MachineFrameInfo &MFI, unsigned i) const {
2724 // Create the nodes corresponding to a load from this parameter slot.
2725 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2726 bool AlwaysUseMutable = shouldGuaranteeTCO(
2727 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2728 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2729 EVT ValVT;
2730 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2731
2732 // If value is passed by pointer we have address passed instead of the value
2733 // itself. No need to extend if the mask value and location share the same
2734 // absolute size.
2735 bool ExtendedInMem =
2736 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2737 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2738
2739 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2740 ValVT = VA.getLocVT();
2741 else
2742 ValVT = VA.getValVT();
2743
2744 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2745 // taken by a return address.
2746 int Offset = 0;
2747 if (CallConv == CallingConv::X86_INTR) {
2748 // X86 interrupts may take one or two arguments.
2749 // On the stack there will be no return address as in regular call.
2750 // Offset of last argument need to be set to -4/-8 bytes.
2751 // Where offset of the first argument out of two, should be set to 0 bytes.
2752 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2753 if (Subtarget.is64Bit() && Ins.size() == 2) {
2754 // The stack pointer needs to be realigned for 64 bit handlers with error
2755 // code, so the argument offset changes by 8 bytes.
2756 Offset += 8;
2757 }
2758 }
2759
2760 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2761 // changed with more analysis.
2762 // In case of tail call optimization mark all arguments mutable. Since they
2763 // could be overwritten by lowering of arguments in case of a tail call.
2764 if (Flags.isByVal()) {
2765 unsigned Bytes = Flags.getByValSize();
2766 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2767 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2768 // Adjust SP offset of interrupt parameter.
2769 if (CallConv == CallingConv::X86_INTR) {
2770 MFI.setObjectOffset(FI, Offset);
2771 }
2772 return DAG.getFrameIndex(FI, PtrVT);
2773 }
2774
2775 // This is an argument in memory. We might be able to perform copy elision.
2776 if (Flags.isCopyElisionCandidate()) {
2777 EVT ArgVT = Ins[i].ArgVT;
2778 SDValue PartAddr;
2779 if (Ins[i].PartOffset == 0) {
2780 // If this is a one-part value or the first part of a multi-part value,
2781 // create a stack object for the entire argument value type and return a
2782 // load from our portion of it. This assumes that if the first part of an
2783 // argument is in memory, the rest will also be in memory.
2784 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
2785 /*Immutable=*/false);
2786 PartAddr = DAG.getFrameIndex(FI, PtrVT);
2787 return DAG.getLoad(
2788 ValVT, dl, Chain, PartAddr,
2789 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2790 } else {
2791 // This is not the first piece of an argument in memory. See if there is
2792 // already a fixed stack object including this offset. If so, assume it
2793 // was created by the PartOffset == 0 branch above and create a load from
2794 // the appropriate offset into it.
2795 int64_t PartBegin = VA.getLocMemOffset();
2796 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
2797 int FI = MFI.getObjectIndexBegin();
2798 for (; MFI.isFixedObjectIndex(FI); ++FI) {
2799 int64_t ObjBegin = MFI.getObjectOffset(FI);
2800 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
2801 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
2802 break;
2803 }
2804 if (MFI.isFixedObjectIndex(FI)) {
2805 SDValue Addr =
2806 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
2807 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
2808 return DAG.getLoad(
2809 ValVT, dl, Chain, Addr,
2810 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
2811 Ins[i].PartOffset));
2812 }
2813 }
2814 }
2815
2816 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2817 VA.getLocMemOffset(), isImmutable);
2818
2819 // Set SExt or ZExt flag.
2820 if (VA.getLocInfo() == CCValAssign::ZExt) {
2821 MFI.setObjectZExt(FI, true);
2822 } else if (VA.getLocInfo() == CCValAssign::SExt) {
2823 MFI.setObjectSExt(FI, true);
2824 }
2825
2826 // Adjust SP offset of interrupt parameter.
2827 if (CallConv == CallingConv::X86_INTR) {
2828 MFI.setObjectOffset(FI, Offset);
2829 }
2830
2831 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2832 SDValue Val = DAG.getLoad(
2833 ValVT, dl, Chain, FIN,
2834 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2835 return ExtendedInMem
2836 ? (VA.getValVT().isVector()
2837 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
2838 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
2839 : Val;
2840}
2841
2842// FIXME: Get this from tablegen.
2843static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2844 const X86Subtarget &Subtarget) {
2845 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2845, __extension__ __PRETTY_FUNCTION__))
;
2846
2847 if (Subtarget.isCallingConvWin64(CallConv)) {
2848 static const MCPhysReg GPR64ArgRegsWin64[] = {
2849 X86::RCX, X86::RDX, X86::R8, X86::R9
2850 };
2851 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2852 }
2853
2854 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2855 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2856 };
2857 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2858}
2859
2860// FIXME: Get this from tablegen.
2861static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2862 CallingConv::ID CallConv,
2863 const X86Subtarget &Subtarget) {
2864 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2864, __extension__ __PRETTY_FUNCTION__))
;
2865 if (Subtarget.isCallingConvWin64(CallConv)) {
2866 // The XMM registers which might contain var arg parameters are shadowed
2867 // in their paired GPR. So we only need to save the GPR to their home
2868 // slots.
2869 // TODO: __vectorcall will change this.
2870 return None;
2871 }
2872
2873 const Function *Fn = MF.getFunction();
2874 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2875 bool isSoftFloat = Subtarget.useSoftFloat();
2876 assert(!(isSoftFloat && NoImplicitFloatOps) &&(static_cast <bool> (!(isSoftFloat && NoImplicitFloatOps
) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2877, __extension__ __PRETTY_FUNCTION__))
2877 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(isSoftFloat && NoImplicitFloatOps
) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2877, __extension__ __PRETTY_FUNCTION__))
;
2878 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
2879 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2880 // registers.
2881 return None;
2882
2883 static const MCPhysReg XMMArgRegs64Bit[] = {
2884 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2885 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2886 };
2887 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2888}
2889
2890#ifndef NDEBUG
2891static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
2892 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2893 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2894 return A.getValNo() < B.getValNo();
2895 });
2896}
2897#endif
2898
2899SDValue X86TargetLowering::LowerFormalArguments(
2900 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2901 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2902 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2903 MachineFunction &MF = DAG.getMachineFunction();
2904 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2905 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
2906
2907 const Function *Fn = MF.getFunction();
2908 if (Fn->hasExternalLinkage() &&
2909 Subtarget.isTargetCygMing() &&
2910 Fn->getName() == "main")
2911 FuncInfo->setForceFramePointer(true);
2912
2913 MachineFrameInfo &MFI = MF.getFrameInfo();
2914 bool Is64Bit = Subtarget.is64Bit();
2915 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2916
2917 assert((static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2919, __extension__ __PRETTY_FUNCTION__))
2918 !(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2919, __extension__ __PRETTY_FUNCTION__))
2919 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2919, __extension__ __PRETTY_FUNCTION__))
;
2920
2921 if (CallConv == CallingConv::X86_INTR) {
2922 bool isLegal = Ins.size() == 1 ||
2923 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2924 (!Is64Bit && Ins[1].VT == MVT::i32)));
2925 if (!isLegal)
2926 report_fatal_error("X86 interrupts may take one or two arguments");
2927 }
2928
2929 // Assign locations to all of the incoming arguments.
2930 SmallVector<CCValAssign, 16> ArgLocs;
2931 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2932
2933 // Allocate shadow area for Win64.
2934 if (IsWin64)
2935 CCInfo.AllocateStack(32, 8);
2936
2937 CCInfo.AnalyzeArguments(Ins, CC_X86);
2938
2939 // In vectorcall calling convention a second pass is required for the HVA
2940 // types.
2941 if (CallingConv::X86_VectorCall == CallConv) {
2942 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
2943 }
2944
2945 // The next loop assumes that the locations are in the same order of the
2946 // input arguments.
2947 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2948, __extension__ __PRETTY_FUNCTION__))
2948 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2948, __extension__ __PRETTY_FUNCTION__))
;
2949
2950 SDValue ArgValue;
2951 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
2952 ++I, ++InsIndex) {
2953 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2953, __extension__ __PRETTY_FUNCTION__))
;
2954 CCValAssign &VA = ArgLocs[I];
2955
2956 if (VA.isRegLoc()) {
2957 EVT RegVT = VA.getLocVT();
2958 if (VA.needsCustom()) {
2959 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2961, __extension__ __PRETTY_FUNCTION__))
2960 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2961, __extension__ __PRETTY_FUNCTION__))
2961 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 2961, __extension__ __PRETTY_FUNCTION__))
;
2962
2963 // v64i1 values, in regcall calling convention, that are
2964 // compiled to 32 bit arch, are split up into two registers.
2965 ArgValue =
2966 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
2967 } else {
2968 const TargetRegisterClass *RC;
2969 if (RegVT == MVT::i32)
2970 RC = &X86::GR32RegClass;
2971 else if (Is64Bit && RegVT == MVT::i64)
2972 RC = &X86::GR64RegClass;
2973 else if (RegVT == MVT::f32)
2974 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
2975 else if (RegVT == MVT::f64)
2976 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
2977 else if (RegVT == MVT::f80)
2978 RC = &X86::RFP80RegClass;
2979 else if (RegVT == MVT::f128)
2980 RC = &X86::FR128RegClass;
2981 else if (RegVT.is512BitVector())
2982 RC = &X86::VR512RegClass;
2983 else if (RegVT.is256BitVector())
2984 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
2985 else if (RegVT.is128BitVector())
2986 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
2987 else if (RegVT == MVT::x86mmx)
2988 RC = &X86::VR64RegClass;
2989 else if (RegVT == MVT::v1i1)
2990 RC = &X86::VK1RegClass;
2991 else if (RegVT == MVT::v8i1)
2992 RC = &X86::VK8RegClass;
2993 else if (RegVT == MVT::v16i1)
2994 RC = &X86::VK16RegClass;
2995 else if (RegVT == MVT::v32i1)
2996 RC = &X86::VK32RegClass;
2997 else if (RegVT == MVT::v64i1)
2998 RC = &X86::VK64RegClass;
2999 else
3000 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3000)
;
3001
3002 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3003 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3004 }
3005
3006 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3007 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3008 // right size.
3009 if (VA.getLocInfo() == CCValAssign::SExt)
3010 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3011 DAG.getValueType(VA.getValVT()));
3012 else if (VA.getLocInfo() == CCValAssign::ZExt)
3013 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3014 DAG.getValueType(VA.getValVT()));
3015 else if (VA.getLocInfo() == CCValAssign::BCvt)
3016 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3017
3018 if (VA.isExtInLoc()) {
3019 // Handle MMX values passed in XMM regs.
3020 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3021 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3022 else if (VA.getValVT().isVector() &&
3023 VA.getValVT().getScalarType() == MVT::i1 &&
3024 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3025 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3026 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3027 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3028 } else
3029 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3030 }
3031 } else {
3032 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3032, __extension__ __PRETTY_FUNCTION__))
;
3033 ArgValue =
3034 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3035 }
3036
3037 // If value is passed via pointer - do a load.
3038 if (VA.getLocInfo() == CCValAssign::Indirect)
3039 ArgValue =
3040 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3041
3042 InVals.push_back(ArgValue);
3043 }
3044
3045 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3046 // Swift calling convention does not require we copy the sret argument
3047 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3048 if (CallConv == CallingConv::Swift)
3049 continue;
3050
3051 // All x86 ABIs require that for returning structs by value we copy the
3052 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3053 // the argument into a virtual register so that we can access it from the
3054 // return points.
3055 if (Ins[I].Flags.isSRet()) {
3056 unsigned Reg = FuncInfo->getSRetReturnReg();
3057 if (!Reg) {
3058 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3059 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3060 FuncInfo->setSRetReturnReg(Reg);
3061 }
3062 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3063 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3064 break;
3065 }
3066 }
3067
3068 unsigned StackSize = CCInfo.getNextStackOffset();
3069 // Align stack specially for tail calls.
3070 if (shouldGuaranteeTCO(CallConv,
3071 MF.getTarget().Options.GuaranteedTailCallOpt))
3072 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3073
3074 // If the function takes variable number of arguments, make a frame index for
3075 // the start of the first vararg value... for expansion of llvm.va_start. We
3076 // can skip this if there are no va_start calls.
3077 if (MFI.hasVAStart() &&
3078 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3079 CallConv != CallingConv::X86_ThisCall))) {
3080 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3081 }
3082
3083 // Figure out if XMM registers are in use.
3084 assert(!(Subtarget.useSoftFloat() &&(static_cast <bool> (!(Subtarget.useSoftFloat() &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
"SSE register cannot be used when SSE is disabled!") ? void (
0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3086, __extension__ __PRETTY_FUNCTION__))
3085 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&(static_cast <bool> (!(Subtarget.useSoftFloat() &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
"SSE register cannot be used when SSE is disabled!") ? void (
0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3086, __extension__ __PRETTY_FUNCTION__))
3086 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(Subtarget.useSoftFloat() &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
"SSE register cannot be used when SSE is disabled!") ? void (
0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3086, __extension__ __PRETTY_FUNCTION__))
;
3087
3088 // 64-bit calling conventions support varargs and register parameters, so we
3089 // have to do extra work to spill them in the prologue.
3090 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3091 // Find the first unallocated argument registers.
3092 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3093 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3094 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3095 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3096 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3097, __extension__ __PRETTY_FUNCTION__))
3097 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3097, __extension__ __PRETTY_FUNCTION__))
;
3098
3099 // Gather all the live in physical registers.
3100 SmallVector<SDValue, 6> LiveGPRs;
3101 SmallVector<SDValue, 8> LiveXMMRegs;
3102 SDValue ALVal;
3103 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3104 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3105 LiveGPRs.push_back(
3106 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3107 }
3108 if (!ArgXMMs.empty()) {
3109 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3110 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3111 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3112 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3113 LiveXMMRegs.push_back(
3114 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3115 }
3116 }
3117
3118 if (IsWin64) {
3119 // Get to the caller-allocated home save location. Add 8 to account
3120 // for the return address.
3121 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3122 FuncInfo->setRegSaveFrameIndex(
3123 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3124 // Fixup to set vararg frame on shadow area (4 x i64).
3125 if (NumIntRegs < 4)
3126 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3127 } else {
3128 // For X86-64, if there are vararg parameters that are passed via
3129 // registers, then we must store them to their spots on the stack so
3130 // they may be loaded by dereferencing the result of va_next.
3131 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3132 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3133 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3134 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3135 }
3136
3137 // Store the integer parameter registers.
3138 SmallVector<SDValue, 8> MemOps;
3139 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3140 getPointerTy(DAG.getDataLayout()));
3141 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3142 for (SDValue Val : LiveGPRs) {
3143 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3144 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3145 SDValue Store =
3146 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3147 MachinePointerInfo::getFixedStack(
3148 DAG.getMachineFunction(),
3149 FuncInfo->getRegSaveFrameIndex(), Offset));
3150 MemOps.push_back(Store);
3151 Offset += 8;
3152 }
3153
3154 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3155 // Now store the XMM (fp + vector) parameter registers.
3156 SmallVector<SDValue, 12> SaveXMMOps;
3157 SaveXMMOps.push_back(Chain);
3158 SaveXMMOps.push_back(ALVal);
3159 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3160 FuncInfo->getRegSaveFrameIndex(), dl));
3161 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3162 FuncInfo->getVarArgsFPOffset(), dl));
3163 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3164 LiveXMMRegs.end());
3165 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3166 MVT::Other, SaveXMMOps));
3167 }
3168
3169 if (!MemOps.empty())
3170 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3171 }
3172
3173 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3174 // Find the largest legal vector type.
3175 MVT VecVT = MVT::Other;
3176 // FIXME: Only some x86_32 calling conventions support AVX512.
3177 if (Subtarget.hasAVX512() &&
3178 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3179 CallConv == CallingConv::Intel_OCL_BI)))
3180 VecVT = MVT::v16f32;
3181 else if (Subtarget.hasAVX())
3182 VecVT = MVT::v8f32;
3183 else if (Subtarget.hasSSE2())
3184 VecVT = MVT::v4f32;
3185
3186 // We forward some GPRs and some vector types.
3187 SmallVector<MVT, 2> RegParmTypes;
3188 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3189 RegParmTypes.push_back(IntVT);
3190 if (VecVT != MVT::Other)
3191 RegParmTypes.push_back(VecVT);
3192
3193 // Compute the set of forwarded registers. The rest are scratch.
3194 SmallVectorImpl<ForwardedRegister> &Forwards =
3195 FuncInfo->getForwardedMustTailRegParms();
3196 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3197
3198 // Conservatively forward AL on x86_64, since it might be used for varargs.
3199 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3200 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3201 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3202 }
3203
3204 // Copy all forwards from physical to virtual registers.
3205 for (ForwardedRegister &F : Forwards) {
3206 // FIXME: Can we use a less constrained schedule?
3207 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3208 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3209 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3210 }
3211 }
3212
3213 // Some CCs need callee pop.
3214 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3215 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3216 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3217 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3218 // X86 interrupts must pop the error code (and the alignment padding) if
3219 // present.
3220 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3221 } else {
3222 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3223 // If this is an sret function, the return should pop the hidden pointer.
3224 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3225 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3226 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3227 FuncInfo->setBytesToPopOnReturn(4);
3228 }
3229
3230 if (!Is64Bit) {
3231 // RegSaveFrameIndex is X86-64 only.
3232 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3233 if (CallConv == CallingConv::X86_FastCall ||
3234 CallConv == CallingConv::X86_ThisCall)
3235 // fastcc functions can't have varargs.
3236 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3237 }
3238
3239 FuncInfo->setArgumentStackSize(StackSize);
3240
3241 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3242 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
3243 if (Personality == EHPersonality::CoreCLR) {
3244 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3244, __extension__ __PRETTY_FUNCTION__))
;
3245 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3246 // that we'd prefer this slot be allocated towards the bottom of the frame
3247 // (i.e. near the stack pointer after allocating the frame). Every
3248 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3249 // offset from the bottom of this and each funclet's frame must be the
3250 // same, so the size of funclets' (mostly empty) frames is dictated by
3251 // how far this slot is from the bottom (since they allocate just enough
3252 // space to accommodate holding this slot at the correct offset).
3253 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3254 EHInfo->PSPSymFrameIdx = PSPSymFI;
3255 }
3256 }
3257
3258 if (CallConv == CallingConv::X86_RegCall ||
3259 Fn->hasFnAttribute("no_caller_saved_registers")) {
3260 MachineRegisterInfo &MRI = MF.getRegInfo();
3261 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3262 MRI.disableCalleeSavedRegister(Pair.first);
3263 }
3264
3265 return Chain;
3266}
3267
3268SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3269 SDValue Arg, const SDLoc &dl,
3270 SelectionDAG &DAG,
3271 const CCValAssign &VA,
3272 ISD::ArgFlagsTy Flags) const {
3273 unsigned LocMemOffset = VA.getLocMemOffset();
3274 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3275 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3276 StackPtr, PtrOff);
3277 if (Flags.isByVal())
3278 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3279
3280 return DAG.getStore(
3281 Chain, dl, Arg, PtrOff,
3282 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3283}
3284
3285/// Emit a load of return address if tail call
3286/// optimization is performed and it is required.
3287SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3288 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3289 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3290 // Adjust the Return address stack slot.
3291 EVT VT = getPointerTy(DAG.getDataLayout());
3292 OutRetAddr = getReturnAddressFrameIndex(DAG);
3293
3294 // Load the "old" Return address.
3295 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3296 return SDValue(OutRetAddr.getNode(), 1);
3297}
3298
3299/// Emit a store of the return address if tail call
3300/// optimization is performed and it is required (FPDiff!=0).
3301static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3302 SDValue Chain, SDValue RetAddrFrIdx,
3303 EVT PtrVT, unsigned SlotSize,
3304 int FPDiff, const SDLoc &dl) {
3305 // Store the return address to the appropriate stack slot.
3306 if (!FPDiff) return Chain;
3307 // Calculate the new stack slot for the return address.
3308 int NewReturnAddrFI =
3309 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3310 false);
3311 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3312 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3313 MachinePointerInfo::getFixedStack(
3314 DAG.getMachineFunction(), NewReturnAddrFI));
3315 return Chain;
3316}
3317
3318/// Returns a vector_shuffle mask for an movs{s|d}, movd
3319/// operation of specified width.
3320static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3321 SDValue V2) {
3322 unsigned NumElems = VT.getVectorNumElements();
3323 SmallVector<int, 8> Mask;
3324 Mask.push_back(NumElems);
3325 for (unsigned i = 1; i != NumElems; ++i)
3326 Mask.push_back(i);
3327 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3328}
3329
3330SDValue
3331X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3332 SmallVectorImpl<SDValue> &InVals) const {
3333 SelectionDAG &DAG = CLI.DAG;
3334 SDLoc &dl = CLI.DL;
3335 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3336 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3337 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3338 SDValue Chain = CLI.Chain;
3339 SDValue Callee = CLI.Callee;
3340 CallingConv::ID CallConv = CLI.CallConv;
3341 bool &isTailCall = CLI.IsTailCall;
3342 bool isVarArg = CLI.IsVarArg;
3343
3344 MachineFunction &MF = DAG.getMachineFunction();
3345 bool Is64Bit = Subtarget.is64Bit();
3346 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3347 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3348 bool IsSibcall = false;
3349 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3350 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
3351 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3352 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3353 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3354 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3355
3356 if (CallConv == CallingConv::X86_INTR)
3357 report_fatal_error("X86 interrupts may not be called directly");
3358
3359 if (Attr.getValueAsString() == "true")
3360 isTailCall = false;
3361
3362 if (Subtarget.isPICStyleGOT() &&
3363 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3364 // If we are using a GOT, disable tail calls to external symbols with
3365 // default visibility. Tail calling such a symbol requires using a GOT
3366 // relocation, which forces early binding of the symbol. This breaks code
3367 // that require lazy function symbol resolution. Using musttail or
3368 // GuaranteedTailCallOpt will override this.
3369 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3370 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3371 G->getGlobal()->hasDefaultVisibility()))
3372 isTailCall = false;
3373 }
3374
3375 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3376 if (IsMustTail) {
3377 // Force this to be a tail call. The verifier rules are enough to ensure
3378 // that we can lower this successfully without moving the return address
3379 // around.
3380 isTailCall = true;
3381 } else if (isTailCall) {
3382 // Check if it's really possible to do a tail call.
3383 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3384 isVarArg, SR != NotStructReturn,
3385 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3386 Outs, OutVals, Ins, DAG);
3387
3388 // Sibcalls are automatically detected tailcalls which do not require
3389 // ABI changes.
3390 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3391 IsSibcall = true;
3392
3393 if (isTailCall)
3394 ++NumTailCalls;
3395 }
3396
3397 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3398, __extension__ __PRETTY_FUNCTION__))
3398 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3398, __extension__ __PRETTY_FUNCTION__))
;
3399
3400 // Analyze operands of the call, assigning locations to each operand.
3401 SmallVector<CCValAssign, 16> ArgLocs;
3402 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3403
3404 // Allocate shadow area for Win64.
3405 if (IsWin64)
3406 CCInfo.AllocateStack(32, 8);
3407
3408 CCInfo.AnalyzeArguments(Outs, CC_X86);
3409
3410 // In vectorcall calling convention a second pass is required for the HVA
3411 // types.
3412 if (CallingConv::X86_VectorCall == CallConv) {
3413 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3414 }
3415
3416 // Get a count of how many bytes are to be pushed on the stack.
3417 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3418 if (IsSibcall)
3419 // This is a sibcall. The memory operands are available in caller's
3420 // own caller's stack.
3421 NumBytes = 0;
3422 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3423 canGuaranteeTCO(CallConv))
3424 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3425
3426 int FPDiff = 0;
3427 if (isTailCall && !IsSibcall && !IsMustTail) {
3428 // Lower arguments at fp - stackoffset + fpdiff.
3429 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3430
3431 FPDiff = NumBytesCallerPushed - NumBytes;
3432
3433 // Set the delta of movement of the returnaddr stackslot.
3434 // But only set if delta is greater than previous delta.
3435 if (FPDiff < X86Info->getTCReturnAddrDelta())
3436 X86Info->setTCReturnAddrDelta(FPDiff);
3437 }
3438
3439 unsigned NumBytesToPush = NumBytes;
3440 unsigned NumBytesToPop = NumBytes;
3441
3442 // If we have an inalloca argument, all stack space has already been allocated
3443 // for us and be right at the top of the stack. We don't support multiple
3444 // arguments passed in memory when using inalloca.
3445 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3446 NumBytesToPush = 0;
3447 if (!ArgLocs.back().isMemLoc())
3448 report_fatal_error("cannot use inalloca attribute on a register "
3449 "parameter");
3450 if (ArgLocs.back().getLocMemOffset() != 0)
3451 report_fatal_error("any parameter with the inalloca attribute must be "
3452 "the only memory argument");
3453 }
3454
3455 if (!IsSibcall)
3456 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3457 NumBytes - NumBytesToPush, dl);
3458
3459 SDValue RetAddrFrIdx;
3460 // Load return address for tail calls.
3461 if (isTailCall && FPDiff)
3462 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3463 Is64Bit, FPDiff, dl);
3464
3465 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3466 SmallVector<SDValue, 8> MemOpChains;
3467 SDValue StackPtr;
3468
3469 // The next loop assumes that the locations are in the same order of the
3470 // input arguments.
3471 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3472, __extension__ __PRETTY_FUNCTION__))
3472 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3472, __extension__ __PRETTY_FUNCTION__))
;
3473
3474 // Walk the register/memloc assignments, inserting copies/loads. In the case
3475 // of tail call optimization arguments are handle later.
3476 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3477 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3478 ++I, ++OutIndex) {
3479 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3479, __extension__ __PRETTY_FUNCTION__))
;
3480 // Skip inalloca arguments, they have already been written.
3481 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3482 if (Flags.isInAlloca())
3483 continue;
3484
3485 CCValAssign &VA = ArgLocs[I];
3486 EVT RegVT = VA.getLocVT();
3487 SDValue Arg = OutVals[OutIndex];
3488 bool isByVal = Flags.isByVal();
3489
3490 // Promote the value if needed.
3491 switch (VA.getLocInfo()) {
3492 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3492)
;
3493 case CCValAssign::Full: break;
3494 case CCValAssign::SExt:
3495 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3496 break;
3497 case CCValAssign::ZExt:
3498 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3499 break;
3500 case CCValAssign::AExt:
3501 if (Arg.getValueType().isVector() &&
3502 Arg.getValueType().getVectorElementType() == MVT::i1)
3503 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3504 else if (RegVT.is128BitVector()) {
3505 // Special case: passing MMX values in XMM registers.
3506 Arg = DAG.getBitcast(MVT::i64, Arg);
3507 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3508 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3509 } else
3510 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3511 break;
3512 case CCValAssign::BCvt:
3513 Arg = DAG.getBitcast(RegVT, Arg);
3514 break;
3515 case CCValAssign::Indirect: {
3516 // Store the argument.
3517 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3518 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3519 Chain = DAG.getStore(
3520 Chain, dl, Arg, SpillSlot,
3521 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3522 Arg = SpillSlot;
3523 break;
3524 }
3525 }
3526
3527 if (VA.needsCustom()) {
3528 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3529, __extension__ __PRETTY_FUNCTION__))
3529 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3529, __extension__ __PRETTY_FUNCTION__))
;
3530 // Split v64i1 value into two registers
3531 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3532 Subtarget);
3533 } else if (VA.isRegLoc()) {
3534 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3535 if (isVarArg && IsWin64) {
3536 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3537 // shadow reg if callee is a varargs function.
3538 unsigned ShadowReg = 0;
3539 switch (VA.getLocReg()) {
3540 case X86::XMM0: ShadowReg = X86::RCX; break;
3541 case X86::XMM1: ShadowReg = X86::RDX; break;
3542 case X86::XMM2: ShadowReg = X86::R8; break;
3543 case X86::XMM3: ShadowReg = X86::R9; break;
3544 }
3545 if (ShadowReg)
3546 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3547 }
3548 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3549 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3549, __extension__ __PRETTY_FUNCTION__))
;
3550 if (!StackPtr.getNode())
3551 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3552 getPointerTy(DAG.getDataLayout()));
3553 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3554 dl, DAG, VA, Flags));
3555 }
3556 }
3557
3558 if (!MemOpChains.empty())
3559 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3560
3561 if (Subtarget.isPICStyleGOT()) {
3562 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3563 // GOT pointer.
3564 if (!isTailCall) {
3565 RegsToPass.push_back(std::make_pair(
3566 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3567 getPointerTy(DAG.getDataLayout()))));
3568 } else {
3569 // If we are tail calling and generating PIC/GOT style code load the
3570 // address of the callee into ECX. The value in ecx is used as target of
3571 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3572 // for tail calls on PIC/GOT architectures. Normally we would just put the
3573 // address of GOT into ebx and then call target@PLT. But for tail calls
3574 // ebx would be restored (since ebx is callee saved) before jumping to the
3575 // target@PLT.
3576
3577 // Note: The actual moving to ECX is done further down.
3578 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3579 if (G && !G->getGlobal()->hasLocalLinkage() &&
3580 G->getGlobal()->hasDefaultVisibility())
3581 Callee = LowerGlobalAddress(Callee, DAG);
3582 else if (isa<ExternalSymbolSDNode>(Callee))
3583 Callee = LowerExternalSymbol(Callee, DAG);
3584 }
3585 }
3586
3587 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3588 // From AMD64 ABI document:
3589 // For calls that may call functions that use varargs or stdargs
3590 // (prototype-less calls or calls to functions containing ellipsis (...) in
3591 // the declaration) %al is used as hidden argument to specify the number
3592 // of SSE registers used. The contents of %al do not need to match exactly
3593 // the number of registers, but must be an ubound on the number of SSE
3594 // registers used and is in the range 0 - 8 inclusive.
3595
3596 // Count the number of XMM registers allocated.
3597 static const MCPhysReg XMMArgRegs[] = {
3598 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3599 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3600 };
3601 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3602 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3603, __extension__ __PRETTY_FUNCTION__))
3603 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3603, __extension__ __PRETTY_FUNCTION__))
;
3604
3605 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3606 DAG.getConstant(NumXMMRegs, dl,
3607 MVT::i8)));
3608 }
3609
3610 if (isVarArg && IsMustTail) {
3611 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3612 for (const auto &F : Forwards) {
3613 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3614 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3615 }
3616 }
3617
3618 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3619 // don't need this because the eligibility check rejects calls that require
3620 // shuffling arguments passed in memory.
3621 if (!IsSibcall && isTailCall) {
3622 // Force all the incoming stack arguments to be loaded from the stack
3623 // before any new outgoing arguments are stored to the stack, because the
3624 // outgoing stack slots may alias the incoming argument stack slots, and
3625 // the alias isn't otherwise explicit. This is slightly more conservative
3626 // than necessary, because it means that each store effectively depends
3627 // on every argument instead of just those arguments it would clobber.
3628 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3629
3630 SmallVector<SDValue, 8> MemOpChains2;
3631 SDValue FIN;
3632 int FI = 0;
3633 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3634 ++I, ++OutsIndex) {
3635 CCValAssign &VA = ArgLocs[I];
3636
3637 if (VA.isRegLoc()) {
3638 if (VA.needsCustom()) {
3639 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3640, __extension__ __PRETTY_FUNCTION__))
3640 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3640, __extension__ __PRETTY_FUNCTION__))
;
3641 // This means that we are in special case where one argument was
3642 // passed through two register locations - Skip the next location
3643 ++I;
3644 }
3645
3646 continue;
3647 }
3648
3649 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3649, __extension__ __PRETTY_FUNCTION__))
;
3650 SDValue Arg = OutVals[OutsIndex];
3651 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3652 // Skip inalloca arguments. They don't require any work.
3653 if (Flags.isInAlloca())
3654 continue;
3655 // Create frame index.
3656 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3657 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3658 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3659 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3660
3661 if (Flags.isByVal()) {
3662 // Copy relative to framepointer.
3663 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3664 if (!StackPtr.getNode())
3665 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3666 getPointerTy(DAG.getDataLayout()));
3667 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3668 StackPtr, Source);
3669
3670 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3671 ArgChain,
3672 Flags, DAG, dl));
3673 } else {
3674 // Store relative to framepointer.
3675 MemOpChains2.push_back(DAG.getStore(
3676 ArgChain, dl, Arg, FIN,
3677 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3678 }
3679 }
3680
3681 if (!MemOpChains2.empty())
3682 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3683
3684 // Store the return address to the appropriate stack slot.
3685 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3686 getPointerTy(DAG.getDataLayout()),
3687 RegInfo->getSlotSize(), FPDiff, dl);
3688 }
3689
3690 // Build a sequence of copy-to-reg nodes chained together with token chain
3691 // and flag operands which copy the outgoing args into registers.
3692 SDValue InFlag;
3693 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3694 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3695 RegsToPass[i].second, InFlag);
3696 InFlag = Chain.getValue(1);
3697 }
3698
3699 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3700 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3700, __extension__ __PRETTY_FUNCTION__))
;
3701 // In the 64-bit large code model, we have to make all calls
3702 // through a register, since the call instruction's 32-bit
3703 // pc-relative offset may not be large enough to hold the whole
3704 // address.
3705 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3706 // If the callee is a GlobalAddress node (quite common, every direct call
3707 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3708 // it.
3709 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3710
3711 // We should use extra load for direct calls to dllimported functions in
3712 // non-JIT mode.
3713 const GlobalValue *GV = G->getGlobal();
3714 if (!GV->hasDLLImportStorageClass()) {
3715 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3716
3717 Callee = DAG.getTargetGlobalAddress(
3718 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3719
3720 if (OpFlags == X86II::MO_GOTPCREL) {
3721 // Add a wrapper.
3722 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3723 getPointerTy(DAG.getDataLayout()), Callee);
3724 // Add extra indirection
3725 Callee = DAG.getLoad(
3726 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3727 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3728 }
3729 }
3730 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3731 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
3732 unsigned char OpFlags =
3733 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3734
3735 Callee = DAG.getTargetExternalSymbol(
3736 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3737 } else if (Subtarget.isTarget64BitILP32() &&
3738 Callee->getValueType(0) == MVT::i32) {
3739 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3740 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3741 }
3742
3743 // Returns a chain & a flag for retval copy to use.
3744 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3745 SmallVector<SDValue, 8> Ops;
3746
3747 if (!IsSibcall && isTailCall) {
3748 Chain = DAG.getCALLSEQ_END(Chain,
3749 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3750 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3751 InFlag = Chain.getValue(1);
3752 }
3753
3754 Ops.push_back(Chain);
3755 Ops.push_back(Callee);
3756
3757 if (isTailCall)
3758 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3759
3760 // Add argument registers to the end of the list so that they are known live
3761 // into the call.
3762 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3763 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3764 RegsToPass[i].second.getValueType()));
3765
3766 // Add a register mask operand representing the call-preserved registers.
3767 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3768 // set X86_INTR calling convention because it has the same CSR mask
3769 // (same preserved registers).
3770 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3771 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3772 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3772, __extension__ __PRETTY_FUNCTION__))
;
3773
3774 // If this is an invoke in a 32-bit function using a funclet-based
3775 // personality, assume the function clobbers all registers. If an exception
3776 // is thrown, the runtime will not restore CSRs.
3777 // FIXME: Model this more precisely so that we can register allocate across
3778 // the normal edge and spill and fill across the exceptional edge.
3779 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
3780 const Function *CallerFn = MF.getFunction();
3781 EHPersonality Pers =
3782 CallerFn->hasPersonalityFn()
3783 ? classifyEHPersonality(CallerFn->getPersonalityFn())
3784 : EHPersonality::Unknown;
3785 if (isFuncletEHPersonality(Pers))
3786 Mask = RegInfo->getNoPreservedMask();
3787 }
3788
3789 // Define a new register mask from the existing mask.
3790 uint32_t *RegMask = nullptr;
3791
3792 // In some calling conventions we need to remove the used physical registers
3793 // from the reg mask.
3794 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
3795 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3796
3797 // Allocate a new Reg Mask and copy Mask.
3798 RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
3799 unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
3800 memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
3801
3802 // Make sure all sub registers of the argument registers are reset
3803 // in the RegMask.
3804 for (auto const &RegPair : RegsToPass)
3805 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
3806 SubRegs.isValid(); ++SubRegs)
3807 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3808
3809 // Create the RegMask Operand according to our updated mask.
3810 Ops.push_back(DAG.getRegisterMask(RegMask));
3811 } else {
3812 // Create the RegMask Operand according to the static mask.
3813 Ops.push_back(DAG.getRegisterMask(Mask));
3814 }
3815
3816 if (InFlag.getNode())
3817 Ops.push_back(InFlag);
3818
3819 if (isTailCall) {
3820 // We used to do:
3821 //// If this is the first return lowered for this function, add the regs
3822 //// to the liveout set for the function.
3823 // This isn't right, although it's probably harmless on x86; liveouts
3824 // should be computed from returns not tail calls. Consider a void
3825 // function making a tail call to a function returning int.
3826 MF.getFrameInfo().setHasTailCall();
3827 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3828 }
3829
3830 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3831 InFlag = Chain.getValue(1);
3832
3833 // Create the CALLSEQ_END node.
3834 unsigned NumBytesForCalleeToPop;
3835 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3836 DAG.getTarget().Options.GuaranteedTailCallOpt))
3837 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3838 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3839 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3840 SR == StackStructReturn)
3841 // If this is a call to a struct-return function, the callee
3842 // pops the hidden struct pointer, so we have to push it back.
3843 // This is common for Darwin/X86, Linux & Mingw32 targets.
3844 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3845 NumBytesForCalleeToPop = 4;
3846 else
3847 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3848
3849 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
3850 // No need to reset the stack after the call if the call doesn't return. To
3851 // make the MI verify, we'll pretend the callee does it for us.
3852 NumBytesForCalleeToPop = NumBytes;
3853 }
3854
3855 // Returns a flag for retval copy to use.
3856 if (!IsSibcall) {
3857 Chain = DAG.getCALLSEQ_END(Chain,
3858 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3859 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3860 true),
3861 InFlag, dl);
3862 InFlag = Chain.getValue(1);
3863 }
3864
3865 // Handle result values, copying them out of physregs into vregs that we
3866 // return.
3867 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
3868 InVals, RegMask);
3869}
3870
3871//===----------------------------------------------------------------------===//
3872// Fast Calling Convention (tail call) implementation
3873//===----------------------------------------------------------------------===//
3874
3875// Like std call, callee cleans arguments, convention except that ECX is
3876// reserved for storing the tail called function address. Only 2 registers are
3877// free for argument passing (inreg). Tail call optimization is performed
3878// provided:
3879// * tailcallopt is enabled
3880// * caller/callee are fastcc
3881// On X86_64 architecture with GOT-style position independent code only local
3882// (within module) calls are supported at the moment.
3883// To keep the stack aligned according to platform abi the function
3884// GetAlignedArgumentStackSize ensures that argument delta is always multiples
3885// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3886// If a tail called function callee has more arguments than the caller the
3887// caller needs to make sure that there is room to move the RETADDR to. This is
3888// achieved by reserving an area the size of the argument delta right after the
3889// original RETADDR, but before the saved framepointer or the spilled registers
3890// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3891// stack layout:
3892// arg1
3893// arg2
3894// RETADDR
3895// [ new RETADDR
3896// move area ]
3897// (possible EBP)
3898// ESI
3899// EDI
3900// local1 ..
3901
3902/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
3903/// requirement.
3904unsigned
3905X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3906 SelectionDAG& DAG) const {
3907 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3908 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3909 unsigned StackAlignment = TFI.getStackAlignment();
3910 uint64_t AlignMask = StackAlignment - 1;
3911 int64_t Offset = StackSize;
3912 unsigned SlotSize = RegInfo->getSlotSize();
3913 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3914 // Number smaller than 12 so just add the difference.
3915 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3916 } else {
3917 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3918 Offset = ((~AlignMask) & Offset) + StackAlignment +
3919 (StackAlignment-SlotSize);
3920 }
3921 return Offset;
3922}
3923
3924/// Return true if the given stack call argument is already available in the
3925/// same position (relatively) of the caller's incoming argument stack.
3926static
3927bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3928 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
3929 const X86InstrInfo *TII, const CCValAssign &VA) {
3930 unsigned Bytes = Arg.getValueSizeInBits() / 8;
3931
3932 for (;;) {
3933 // Look through nodes that don't alter the bits of the incoming value.
3934 unsigned Op = Arg.getOpcode();
3935 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
3936 Arg = Arg.getOperand(0);
3937 continue;
3938 }
3939 if (Op == ISD::TRUNCATE) {
3940 const SDValue &TruncInput = Arg.getOperand(0);
3941 if (TruncInput.getOpcode() == ISD::AssertZext &&
3942 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
3943 Arg.getValueType()) {
3944 Arg = TruncInput.getOperand(0);
3945 continue;
3946 }
3947 }
3948 break;
3949 }
3950
3951 int FI = INT_MAX2147483647;
3952 if (Arg.getOpcode() == ISD::CopyFromReg) {
3953 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3954 if (!TargetRegisterInfo::isVirtualRegister(VR))
3955 return false;
3956 MachineInstr *Def = MRI->getVRegDef(VR);
3957 if (!Def)
3958 return false;
3959 if (!Flags.isByVal()) {
3960 if (!TII->isLoadFromStackSlot(*Def, FI))
3961 return false;
3962 } else {
3963 unsigned Opcode = Def->getOpcode();
3964 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
3965 Opcode == X86::LEA64_32r) &&
3966 Def->getOperand(1).isFI()) {
3967 FI = Def->getOperand(1).getIndex();
3968 Bytes = Flags.getByValSize();
3969 } else
3970 return false;
3971 }
3972 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
3973 if (Flags.isByVal())
3974 // ByVal argument is passed in as a pointer but it's now being
3975 // dereferenced. e.g.
3976 // define @foo(%struct.X* %A) {
3977 // tail call @bar(%struct.X* byval %A)
3978 // }
3979 return false;
3980 SDValue Ptr = Ld->getBasePtr();
3981 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
3982 if (!FINode)
3983 return false;
3984 FI = FINode->getIndex();
3985 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
3986 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
3987 FI = FINode->getIndex();
3988 Bytes = Flags.getByValSize();
3989 } else
3990 return false;
3991
3992 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 3992, __extension__ __PRETTY_FUNCTION__))
;
3993 if (!MFI.isFixedObjectIndex(FI))
3994 return false;
3995
3996 if (Offset != MFI.getObjectOffset(FI))
3997 return false;
3998
3999 // If this is not byval, check that the argument stack object is immutable.
4000 // inalloca and argument copy elision can create mutable argument stack
4001 // objects. Byval objects can be mutated, but a byval call intends to pass the
4002 // mutated memory.
4003 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4004 return false;
4005
4006 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4007 // If the argument location is wider than the argument type, check that any
4008 // extension flags match.
4009 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4010 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4011 return false;
4012 }
4013 }
4014
4015 return Bytes == MFI.getObjectSize(FI);
4016}
4017
4018/// Check whether the call is eligible for tail call optimization. Targets
4019/// that want to do tail call optimization should implement this function.
4020bool X86TargetLowering::IsEligibleForTailCallOptimization(
4021 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4022 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4023 const SmallVectorImpl<ISD::OutputArg> &Outs,
4024 const SmallVectorImpl<SDValue> &OutVals,
4025 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4026 if (!mayTailCallThisCC(CalleeCC))
4027 return false;
4028
4029 // If -tailcallopt is specified, make fastcc functions tail-callable.
4030 MachineFunction &MF = DAG.getMachineFunction();
4031 const Function *CallerF = MF.getFunction();
4032
4033 // If the function return type is x86_fp80 and the callee return type is not,
4034 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4035 // perform a tailcall optimization here.
4036 if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4037 return false;
4038
4039 CallingConv::ID CallerCC = CallerF->getCallingConv();
4040 bool CCMatch = CallerCC == CalleeCC;
4041 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4042 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4043
4044 // Win64 functions have extra shadow space for argument homing. Don't do the
4045 // sibcall if the caller and callee have mismatched expectations for this
4046 // space.
4047 if (IsCalleeWin64 != IsCallerWin64)
4048 return false;
4049
4050 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4051 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4052 return true;
4053 return false;
4054 }
4055
4056 // Look for obvious safe cases to perform tail call optimization that do not
4057 // require ABI changes. This is what gcc calls sibcall.
4058
4059 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4060 // emit a special epilogue.
4061 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4062 if (RegInfo->needsStackRealignment(MF))
4063 return false;
4064
4065 // Also avoid sibcall optimization if either caller or callee uses struct
4066 // return semantics.
4067 if (isCalleeStructRet || isCallerStructRet)
4068 return false;
4069
4070 // Do not sibcall optimize vararg calls unless all arguments are passed via
4071 // registers.
4072 LLVMContext &C = *DAG.getContext();
4073 if (isVarArg && !Outs.empty()) {
4074 // Optimizing for varargs on Win64 is unlikely to be safe without
4075 // additional testing.
4076 if (IsCalleeWin64 || IsCallerWin64)
4077 return false;
4078
4079 SmallVector<CCValAssign, 16> ArgLocs;
4080 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4081
4082 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4083 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4084 if (!ArgLocs[i].isRegLoc())
4085 return false;
4086 }
4087
4088 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4089 // stack. Therefore, if it's not used by the call it is not safe to optimize
4090 // this into a sibcall.
4091 bool Unused = false;
4092 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4093 if (!Ins[i].Used) {
4094 Unused = true;
4095 break;
4096 }
4097 }
4098 if (Unused) {
4099 SmallVector<CCValAssign, 16> RVLocs;
4100 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4101 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4102 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4103 CCValAssign &VA = RVLocs[i];
4104 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4105 return false;
4106 }
4107 }
4108
4109 // Check that the call results are passed in the same way.
4110 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4111 RetCC_X86, RetCC_X86))
4112 return false;
4113 // The callee has to preserve all registers the caller needs to preserve.
4114 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4115 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4116 if (!CCMatch) {
4117 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4118 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4119 return false;
4120 }
4121
4122 unsigned StackArgsSize = 0;
4123
4124 // If the callee takes no arguments then go on to check the results of the
4125 // call.
4126 if (!Outs.empty()) {
4127 // Check if stack adjustment is needed. For now, do not do this if any
4128 // argument is passed on the stack.
4129 SmallVector<CCValAssign, 16> ArgLocs;
4130 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4131
4132 // Allocate shadow area for Win64
4133 if (IsCalleeWin64)
4134 CCInfo.AllocateStack(32, 8);
4135
4136 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4137 StackArgsSize = CCInfo.getNextStackOffset();
4138
4139 if (CCInfo.getNextStackOffset()) {
4140 // Check if the arguments are already laid out in the right way as
4141 // the caller's fixed stack objects.
4142 MachineFrameInfo &MFI = MF.getFrameInfo();
4143 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4144 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4145 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4146 CCValAssign &VA = ArgLocs[i];
4147 SDValue Arg = OutVals[i];
4148 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4149 if (VA.getLocInfo() == CCValAssign::Indirect)
4150 return false;
4151 if (!VA.isRegLoc()) {
4152 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4153 MFI, MRI, TII, VA))
4154 return false;
4155 }
4156 }
4157 }
4158
4159 bool PositionIndependent = isPositionIndependent();
4160 // If the tailcall address may be in a register, then make sure it's
4161 // possible to register allocate for it. In 32-bit, the call address can
4162 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4163 // callee-saved registers are restored. These happen to be the same
4164 // registers used to pass 'inreg' arguments so watch out for those.
4165 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4166 !isa<ExternalSymbolSDNode>(Callee)) ||
4167 PositionIndependent)) {
4168 unsigned NumInRegs = 0;
4169 // In PIC we need an extra register to formulate the address computation
4170 // for the callee.
4171 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4172
4173 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4174 CCValAssign &VA = ArgLocs[i];
4175 if (!VA.isRegLoc())
4176 continue;
4177 unsigned Reg = VA.getLocReg();
4178 switch (Reg) {
4179 default: break;
4180 case X86::EAX: case X86::EDX: case X86::ECX:
4181 if (++NumInRegs == MaxInRegs)
4182 return false;
4183 break;
4184 }
4185 }
4186 }
4187
4188 const MachineRegisterInfo &MRI = MF.getRegInfo();
4189 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4190 return false;
4191 }
4192
4193 bool CalleeWillPop =
4194 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4195 MF.getTarget().Options.GuaranteedTailCallOpt);
4196
4197 if (unsigned BytesToPop =
4198 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4199 // If we have bytes to pop, the callee must pop them.
4200 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4201 if (!CalleePopMatches)
4202 return false;
4203 } else if (CalleeWillPop && StackArgsSize > 0) {
4204 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4205 return false;
4206 }
4207
4208 return true;
4209}
4210
4211FastISel *
4212X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4213 const TargetLibraryInfo *libInfo) const {
4214 return X86::createFastISel(funcInfo, libInfo);
4215}
4216
4217//===----------------------------------------------------------------------===//
4218// Other Lowering Hooks
4219//===----------------------------------------------------------------------===//
4220
4221static bool MayFoldLoad(SDValue Op) {
4222 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4223}
4224
4225static bool MayFoldIntoStore(SDValue Op) {
4226 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4227}
4228
4229static bool MayFoldIntoZeroExtend(SDValue Op) {
4230 if (Op.hasOneUse()) {
4231 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4232 return (ISD::ZERO_EXTEND == Opcode);
4233 }
4234 return false;
4235}
4236
4237static bool isTargetShuffle(unsigned Opcode) {
4238 switch(Opcode) {
4239 default: return false;
4240 case X86ISD::BLENDI:
4241 case X86ISD::PSHUFB:
4242 case X86ISD::PSHUFD:
4243 case X86ISD::PSHUFHW:
4244 case X86ISD::PSHUFLW:
4245 case X86ISD::SHUFP:
4246 case X86ISD::INSERTPS:
4247 case X86ISD::EXTRQI:
4248 case X86ISD::INSERTQI:
4249 case X86ISD::PALIGNR:
4250 case X86ISD::VSHLDQ:
4251 case X86ISD::VSRLDQ:
4252 case X86ISD::MOVLHPS:
4253 case X86ISD::MOVHLPS:
4254 case X86ISD::MOVLPS:
4255 case X86ISD::MOVLPD:
4256 case X86ISD::MOVSHDUP:
4257 case X86ISD::MOVSLDUP:
4258 case X86ISD::MOVDDUP:
4259 case X86ISD::MOVSS:
4260 case X86ISD::MOVSD:
4261 case X86ISD::UNPCKL:
4262 case X86ISD::UNPCKH:
4263 case X86ISD::VBROADCAST:
4264 case X86ISD::VPERMILPI:
4265 case X86ISD::VPERMILPV:
4266 case X86ISD::VPERM2X128:
4267 case X86ISD::VPERMIL2:
4268 case X86ISD::VPERMI:
4269 case X86ISD::VPPERM:
4270 case X86ISD::VPERMV:
4271 case X86ISD::VPERMV3:
4272 case X86ISD::VPERMIV3:
4273 case X86ISD::VZEXT_MOVL:
4274 return true;
4275 }
4276}
4277
4278static bool isTargetShuffleVariableMask(unsigned Opcode) {
4279 switch (Opcode) {
4280 default: return false;
4281 // Target Shuffles.
4282 case X86ISD::PSHUFB:
4283 case X86ISD::VPERMILPV:
4284 case X86ISD::VPERMIL2:
4285 case X86ISD::VPPERM:
4286 case X86ISD::VPERMV:
4287 case X86ISD::VPERMV3:
4288 case X86ISD::VPERMIV3:
4289 return true;
4290 // 'Faux' Target Shuffles.
4291 case ISD::AND:
4292 case X86ISD::ANDNP:
4293 return true;
4294 }
4295}
4296
4297SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4298 MachineFunction &MF = DAG.getMachineFunction();
4299 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4300 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4301 int ReturnAddrIndex = FuncInfo->getRAIndex();
4302
4303 if (ReturnAddrIndex == 0) {
4304 // Set up a frame object for the return address.
4305 unsigned SlotSize = RegInfo->getSlotSize();
4306 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4307 -(int64_t)SlotSize,
4308 false);
4309 FuncInfo->setRAIndex(ReturnAddrIndex);
4310 }
4311
4312 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4313}
4314
4315bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4316 bool hasSymbolicDisplacement) {
4317 // Offset should fit into 32 bit immediate field.
4318 if (!isInt<32>(Offset))
4319 return false;
4320
4321 // If we don't have a symbolic displacement - we don't have any extra
4322 // restrictions.
4323 if (!hasSymbolicDisplacement)
4324 return true;
4325
4326 // FIXME: Some tweaks might be needed for medium code model.
4327 if (M != CodeModel::Small && M != CodeModel::Kernel)
4328 return false;
4329
4330 // For small code model we assume that latest object is 16MB before end of 31
4331 // bits boundary. We may also accept pretty large negative constants knowing
4332 // that all objects are in the positive half of address space.
4333 if (M == CodeModel::Small && Offset < 16*1024*1024)
4334 return true;
4335
4336 // For kernel code model we know that all object resist in the negative half
4337 // of 32bits address space. We may not accept negative offsets, since they may
4338 // be just off and we may accept pretty large positive ones.
4339 if (M == CodeModel::Kernel && Offset >= 0)
4340 return true;
4341
4342 return false;
4343}
4344
4345/// Determines whether the callee is required to pop its own arguments.
4346/// Callee pop is necessary to support tail calls.
4347bool X86::isCalleePop(CallingConv::ID CallingConv,
4348 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4349 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4350 // can guarantee TCO.
4351 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4352 return true;
4353
4354 switch (CallingConv) {
4355 default:
4356 return false;
4357 case CallingConv::X86_StdCall:
4358 case CallingConv::X86_FastCall:
4359 case CallingConv::X86_ThisCall:
4360 case CallingConv::X86_VectorCall:
4361 return !is64Bit;
4362 }
4363}
4364
4365/// \brief Return true if the condition is an unsigned comparison operation.
4366static bool isX86CCUnsigned(unsigned X86CC) {
4367 switch (X86CC) {
4368 default:
4369 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4369)
;
4370 case X86::COND_E:
4371 case X86::COND_NE:
4372 case X86::COND_B:
4373 case X86::COND_A:
4374 case X86::COND_BE:
4375 case X86::COND_AE:
4376 return true;
4377 case X86::COND_G:
4378 case X86::COND_GE:
4379 case X86::COND_L:
4380 case X86::COND_LE:
4381 return false;
4382 }
4383}
4384
4385static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4386 switch (SetCCOpcode) {
4387 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4387)
;
4388 case ISD::SETEQ: return X86::COND_E;
4389 case ISD::SETGT: return X86::COND_G;
4390 case ISD::SETGE: return X86::COND_GE;
4391 case ISD::SETLT: return X86::COND_L;
4392 case ISD::SETLE: return X86::COND_LE;
4393 case ISD::SETNE: return X86::COND_NE;
4394 case ISD::SETULT: return X86::COND_B;
4395 case ISD::SETUGT: return X86::COND_A;
4396 case ISD::SETULE: return X86::COND_BE;
4397 case ISD::SETUGE: return X86::COND_AE;
4398 }
4399}
4400
4401/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4402/// condition code, returning the condition code and the LHS/RHS of the
4403/// comparison to make.
4404static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4405 bool isFP, SDValue &LHS, SDValue &RHS,
4406 SelectionDAG &DAG) {
4407 if (!isFP) {
4408 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4409 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4410 // X > -1 -> X == 0, jump !sign.
4411 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4412 return X86::COND_NS;
4413 }
4414 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4415 // X < 0 -> X == 0, jump on sign.
4416 return X86::COND_S;
4417 }
4418 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4419 // X < 1 -> X <= 0
4420 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4421 return X86::COND_LE;
4422 }
4423 }
4424
4425 return TranslateIntegerX86CC(SetCCOpcode);
4426 }
4427
4428 // First determine if it is required or is profitable to flip the operands.
4429
4430 // If LHS is a foldable load, but RHS is not, flip the condition.
4431 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4432 !ISD::isNON_EXTLoad(RHS.getNode())) {
4433 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4434 std::swap(LHS, RHS);
4435 }
4436
4437 switch (SetCCOpcode) {
4438 default: break;
4439 case ISD::SETOLT:
4440 case ISD::SETOLE:
4441 case ISD::SETUGT:
4442 case ISD::SETUGE:
4443 std::swap(LHS, RHS);
4444 break;
4445 }
4446
4447 // On a floating point condition, the flags are set as follows:
4448 // ZF PF CF op
4449 // 0 | 0 | 0 | X > Y
4450 // 0 | 0 | 1 | X < Y
4451 // 1 | 0 | 0 | X == Y
4452 // 1 | 1 | 1 | unordered
4453 switch (SetCCOpcode) {
4454 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4454)
;
4455 case ISD::SETUEQ:
4456 case ISD::SETEQ: return X86::COND_E;
4457 case ISD::SETOLT: // flipped
4458 case ISD::SETOGT:
4459 case ISD::SETGT: return X86::COND_A;
4460 case ISD::SETOLE: // flipped
4461 case ISD::SETOGE:
4462 case ISD::SETGE: return X86::COND_AE;
4463 case ISD::SETUGT: // flipped
4464 case ISD::SETULT:
4465 case ISD::SETLT: return X86::COND_B;
4466 case ISD::SETUGE: // flipped
4467 case ISD::SETULE:
4468 case ISD::SETLE: return X86::COND_BE;
4469 case ISD::SETONE:
4470 case ISD::SETNE: return X86::COND_NE;
4471 case ISD::SETUO: return X86::COND_P;
4472 case ISD::SETO: return X86::COND_NP;
4473 case ISD::SETOEQ:
4474 case ISD::SETUNE: return X86::COND_INVALID;
4475 }
4476}
4477
4478/// Is there a floating point cmov for the specific X86 condition code?
4479/// Current x86 isa includes the following FP cmov instructions:
4480/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4481static bool hasFPCMov(unsigned X86CC) {
4482 switch (X86CC) {
4483 default:
4484 return false;
4485 case X86::COND_B:
4486 case X86::COND_BE:
4487 case X86::COND_E:
4488 case X86::COND_P:
4489 case X86::COND_A:
4490 case X86::COND_AE:
4491 case X86::COND_NE:
4492 case X86::COND_NP:
4493 return true;
4494 }
4495}
4496
4497
4498bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4499 const CallInst &I,
4500 unsigned Intrinsic) const {
4501
4502 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4503 if (!IntrData)
4504 return false;
4505
4506 Info.opc = ISD::INTRINSIC_W_CHAIN;
4507 Info.readMem = false;
4508 Info.writeMem = false;
4509 Info.vol = false;
4510 Info.offset = 0;
4511
4512 switch (IntrData->Type) {
4513 case EXPAND_FROM_MEM: {
4514 Info.ptrVal = I.getArgOperand(0);
4515 Info.memVT = MVT::getVT(I.getType());
4516 Info.align = 1;
4517 Info.readMem = true;
4518 break;
4519 }
4520 case COMPRESS_TO_MEM: {
4521 Info.ptrVal = I.getArgOperand(0);
4522 Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
4523 Info.align = 1;
4524 Info.writeMem = true;
4525 break;
4526 }
4527 case TRUNCATE_TO_MEM_VI8:
4528 case TRUNCATE_TO_MEM_VI16:
4529 case TRUNCATE_TO_MEM_VI32: {
4530 Info.ptrVal = I.getArgOperand(0);
4531 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4532 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4533 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4534 ScalarVT = MVT::i8;
4535 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4536 ScalarVT = MVT::i16;
4537 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4538 ScalarVT = MVT::i32;
4539
4540 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4541 Info.align = 1;
4542 Info.writeMem = true;
4543 break;
4544 }
4545 default:
4546 return false;
4547 }
4548
4549 return true;
4550}
4551
4552/// Returns true if the target can instruction select the
4553/// specified FP immediate natively. If false, the legalizer will
4554/// materialize the FP immediate as a load from a constant pool.
4555bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4556 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4557 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4558 return true;
4559 }
4560 return false;
4561}
4562
4563bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4564 ISD::LoadExtType ExtTy,
4565 EVT NewVT) const {
4566 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4567 // relocation target a movq or addq instruction: don't let the load shrink.
4568 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4569 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4570 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4571 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4572 return true;
4573}
4574
4575/// \brief Returns true if it is beneficial to convert a load of a constant
4576/// to just the constant itself.
4577bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4578 Type *Ty) const {
4579 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4579, __extension__ __PRETTY_FUNCTION__))
;
4580
4581 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4582 if (BitSize == 0 || BitSize > 64)
4583 return false;
4584 return true;
4585}
4586
4587bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4588 // TODO: It might be a win to ease or lift this restriction, but the generic
4589 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4590 if (VT.isVector() && Subtarget.hasAVX512())
4591 return false;
4592
4593 return true;
4594}
4595
4596bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4597 unsigned Index) const {
4598 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4599 return false;
4600
4601 // Mask vectors support all subregister combinations and operations that
4602 // extract half of vector.
4603 if (ResVT.getVectorElementType() == MVT::i1)
4604 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4605 (Index == ResVT.getVectorNumElements()));
4606
4607 return (Index % ResVT.getVectorNumElements()) == 0;
4608}
4609
4610bool X86TargetLowering::isCheapToSpeculateCttz() const {
4611 // Speculate cttz only if we can directly use TZCNT.
4612 return Subtarget.hasBMI();
4613}
4614
4615bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4616 // Speculate ctlz only if we can directly use LZCNT.
4617 return Subtarget.hasLZCNT();
4618}
4619
4620bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4621 const SelectionDAG &DAG) const {
4622 // Do not merge to float value size (128 bytes) if no implicit
4623 // float attribute is set.
4624 bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
4625 Attribute::NoImplicitFloat);
4626
4627 if (NoFloat) {
4628 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4629 return (MemVT.getSizeInBits() <= MaxIntSize);
4630 }
4631 return true;
4632}
4633
4634bool X86TargetLowering::isCtlzFast() const {
4635 return Subtarget.hasFastLZCNT();
4636}
4637
4638bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4639 const Instruction &AndI) const {
4640 return true;
4641}
4642
4643bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4644 if (!Subtarget.hasBMI())
4645 return false;
4646
4647 // There are only 32-bit and 64-bit forms for 'andn'.
4648 EVT VT = Y.getValueType();
4649 if (VT != MVT::i32 && VT != MVT::i64)
4650 return false;
4651
4652 return true;
4653}
4654
4655MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
4656 MVT VT = MVT::getIntegerVT(NumBits);
4657 if (isTypeLegal(VT))
4658 return VT;
4659
4660 // PMOVMSKB can handle this.
4661 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
4662 return MVT::v16i8;
4663
4664 // VPMOVMSKB can handle this.
4665 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
4666 return MVT::v32i8;
4667
4668 // TODO: Allow 64-bit type for 32-bit target.
4669 // TODO: 512-bit types should be allowed, but make sure that those
4670 // cases are handled in combineVectorSizedSetCCEquality().
4671
4672 return MVT::INVALID_SIMPLE_VALUE_TYPE;
4673}
4674
4675/// Val is the undef sentinel value or equal to the specified value.
4676static bool isUndefOrEqual(int Val, int CmpVal) {
4677 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
4678}
4679
4680/// Val is either the undef or zero sentinel value.
4681static bool isUndefOrZero(int Val) {
4682 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
4683}
4684
4685/// Return true if every element in Mask, beginning
4686/// from position Pos and ending in Pos+Size is the undef sentinel value.
4687static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
4688 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4689 if (Mask[i] != SM_SentinelUndef)
4690 return false;
4691 return true;
4692}
4693
4694/// Return true if Val is undef or if its value falls within the
4695/// specified range (L, H].
4696static bool isUndefOrInRange(int Val, int Low, int Hi) {
4697 return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
4698}
4699
4700/// Return true if every element in Mask is undef or if its value
4701/// falls within the specified range (L, H].
4702static bool isUndefOrInRange(ArrayRef<int> Mask,
4703 int Low, int Hi) {
4704 for (int M : Mask)
4705 if (!isUndefOrInRange(M, Low, Hi))
4706 return false;
4707 return true;
4708}
4709
4710/// Return true if Val is undef, zero or if its value falls within the
4711/// specified range (L, H].
4712static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
4713 return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
4714}
4715
4716/// Return true if every element in Mask is undef, zero or if its value
4717/// falls within the specified range (L, H].
4718static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
4719 for (int M : Mask)
4720 if (!isUndefOrZeroOrInRange(M, Low, Hi))
4721 return false;
4722 return true;
4723}
4724
4725/// Return true if every element in Mask, beginning
4726/// from position Pos and ending in Pos+Size, falls within the specified
4727/// sequential range (Low, Low+Size]. or is undef.
4728static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
4729 unsigned Pos, unsigned Size, int Low) {
4730 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
4731 if (!isUndefOrEqual(Mask[i], Low))
4732 return false;
4733 return true;
4734}
4735
4736/// Return true if every element in Mask, beginning
4737/// from position Pos and ending in Pos+Size, falls within the specified
4738/// sequential range (Low, Low+Size], or is undef or is zero.
4739static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4740 unsigned Size, int Low) {
4741 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
4742 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
4743 return false;
4744 return true;
4745}
4746
4747/// Return true if every element in Mask, beginning
4748/// from position Pos and ending in Pos+Size is undef or is zero.
4749static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4750 unsigned Size) {
4751 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4752 if (!isUndefOrZero(Mask[i]))
4753 return false;
4754 return true;
4755}
4756
4757/// \brief Helper function to test whether a shuffle mask could be
4758/// simplified by widening the elements being shuffled.
4759///
4760/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
4761/// leaves it in an unspecified state.
4762///
4763/// NOTE: This must handle normal vector shuffle masks and *target* vector
4764/// shuffle masks. The latter have the special property of a '-2' representing
4765/// a zero-ed lane of a vector.
4766static bool canWidenShuffleElements(ArrayRef<int> Mask,
4767 SmallVectorImpl<int> &WidenedMask) {
4768 WidenedMask.assign(Mask.size() / 2, 0);
4769 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
4770 int M0 = Mask[i];
4771 int M1 = Mask[i + 1];
4772
4773 // If both elements are undef, its trivial.
4774 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
4775 WidenedMask[i / 2] = SM_SentinelUndef;
4776 continue;
4777 }
4778
4779 // Check for an undef mask and a mask value properly aligned to fit with
4780 // a pair of values. If we find such a case, use the non-undef mask's value.
4781 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
4782 WidenedMask[i / 2] = M1 / 2;
4783 continue;
4784 }
4785 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
4786 WidenedMask[i / 2] = M0 / 2;
4787 continue;
4788 }
4789
4790 // When zeroing, we need to spread the zeroing across both lanes to widen.
4791 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
4792 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
4793 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
4794 WidenedMask[i / 2] = SM_SentinelZero;
4795 continue;
4796 }
4797 return false;
4798 }
4799
4800 // Finally check if the two mask values are adjacent and aligned with
4801 // a pair.
4802 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
4803 WidenedMask[i / 2] = M0 / 2;
4804 continue;
4805 }
4806
4807 // Otherwise we can't safely widen the elements used in this shuffle.
4808 return false;
4809 }
4810 assert(WidenedMask.size() == Mask.size() / 2 &&(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4811, __extension__ __PRETTY_FUNCTION__))
4811 "Incorrect size of mask after widening the elements!")(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4811, __extension__ __PRETTY_FUNCTION__))
;
4812
4813 return true;
4814}
4815
4816/// Returns true if Elt is a constant zero or a floating point constant +0.0.
4817bool X86::isZeroNode(SDValue Elt) {
4818 return isNullConstant(Elt) || isNullFPConstant(Elt);
4819}
4820
4821// Build a vector of constants.
4822// Use an UNDEF node if MaskElt == -1.
4823// Split 64-bit constants in the 32-bit mode.
4824static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
4825 const SDLoc &dl, bool IsMask = false) {
4826
4827 SmallVector<SDValue, 32> Ops;
4828 bool Split = false;
4829
4830 MVT ConstVecVT = VT;
4831 unsigned NumElts = VT.getVectorNumElements();
4832 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4833 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4834 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4835 Split = true;
4836 }
4837
4838 MVT EltVT = ConstVecVT.getVectorElementType();
4839 for (unsigned i = 0; i < NumElts; ++i) {
4840 bool IsUndef = Values[i] < 0 && IsMask;
4841 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
4842 DAG.getConstant(Values[i], dl, EltVT);
4843 Ops.push_back(OpNode);
4844 if (Split)
4845 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
4846 DAG.getConstant(0, dl, EltVT));
4847 }
4848 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4849 if (Split)
4850 ConstsNode = DAG.getBitcast(VT, ConstsNode);
4851 return ConstsNode;
4852}
4853
4854static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
4855 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4856 assert(Bits.size() == Undefs.getBitWidth() &&(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4857, __extension__ __PRETTY_FUNCTION__))
4857 "Unequal constant and undef arrays")(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4857, __extension__ __PRETTY_FUNCTION__))
;
4858 SmallVector<SDValue, 32> Ops;
4859 bool Split = false;
4860
4861 MVT ConstVecVT = VT;
4862 unsigned NumElts = VT.getVectorNumElements();
4863 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4864 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4865 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4866 Split = true;
4867 }
4868
4869 MVT EltVT = ConstVecVT.getVectorElementType();
4870 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
4871 if (Undefs[i]) {
4872 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
4873 continue;
4874 }
4875 const APInt &V = Bits[i];
4876 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")(static_cast <bool> (V.getBitWidth() == VT.getScalarSizeInBits
() && "Unexpected sizes") ? void (0) : __assert_fail (
"V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4876, __extension__ __PRETTY_FUNCTION__))
;
4877 if (Split) {
4878 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
4879 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
4880 } else if (EltVT == MVT::f32) {
4881 APFloat FV(APFloat::IEEEsingle(), V);
4882 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4883 } else if (EltVT == MVT::f64) {
4884 APFloat FV(APFloat::IEEEdouble(), V);
4885 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4886 } else {
4887 Ops.push_back(DAG.getConstant(V, dl, EltVT));
4888 }
4889 }
4890
4891 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4892 return DAG.getBitcast(VT, ConstsNode);
4893}
4894
4895/// Returns a vector of specified type with all zero elements.
4896static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
4897 SelectionDAG &DAG, const SDLoc &dl) {
4898 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4900, __extension__ __PRETTY_FUNCTION__))
4899 VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4900, __extension__ __PRETTY_FUNCTION__))
4900 "Unexpected vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4900, __extension__ __PRETTY_FUNCTION__))
;
4901
4902 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
4903 // type. This ensures they get CSE'd. But if the integer type is not
4904 // available, use a floating-point +0.0 instead.
4905 SDValue Vec;
4906 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
4907 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
4908 } else if (VT.getVectorElementType() == MVT::i1) {
4909 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4910, __extension__ __PRETTY_FUNCTION__))
4910 "Unexpected vector type")(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4910, __extension__ __PRETTY_FUNCTION__))
;
4911 assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&(static_cast <bool> ((Subtarget.hasVLX() || VT.getVectorNumElements
() >= 8) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4912, __extension__ __PRETTY_FUNCTION__))
4912 "Unexpected vector type")(static_cast <bool> ((Subtarget.hasVLX() || VT.getVectorNumElements
() >= 8) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4912, __extension__ __PRETTY_FUNCTION__))
;
4913 Vec = DAG.getConstant(0, dl, VT);
4914 } else {
4915 unsigned Num32BitElts = VT.getSizeInBits() / 32;
4916 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
4917 }
4918 return DAG.getBitcast(VT, Vec);
4919}
4920
4921static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
4922 const SDLoc &dl, unsigned vectorWidth) {
4923 EVT VT = Vec.getValueType();
4924 EVT ElVT = VT.getVectorElementType();
4925 unsigned Factor = VT.getSizeInBits()/vectorWidth;
4926 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
4927 VT.getVectorNumElements()/Factor);
4928
4929 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
4930 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
4931 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4931, __extension__ __PRETTY_FUNCTION__))
;
4932
4933 // This is the index of the first element of the vectorWidth-bit chunk
4934 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
4935 IdxVal &= ~(ElemsPerChunk - 1);
4936
4937 // If the input is a buildvector just emit a smaller one.
4938 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
4939 return DAG.getBuildVector(ResultVT, dl,
4940 Vec->ops().slice(IdxVal, ElemsPerChunk));
4941
4942 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
4943 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
4944}
4945
4946/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
4947/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
4948/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
4949/// instructions or a simple subregister reference. Idx is an index in the
4950/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
4951/// lowering EXTRACT_VECTOR_ELT operations easier.
4952static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
4953 SelectionDAG &DAG, const SDLoc &dl) {
4954 assert((Vec.getValueType().is256BitVector() ||(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __extension__ __PRETTY_FUNCTION__))
4955 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __extension__ __PRETTY_FUNCTION__))
;
4956 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
4957}
4958
4959/// Generate a DAG to grab 256-bits from a 512-bit vector.
4960static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
4961 SelectionDAG &DAG, const SDLoc &dl) {
4962 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is512BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4962, __extension__ __PRETTY_FUNCTION__))
;
4963 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
4964}
4965
4966static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
4967 SelectionDAG &DAG, const SDLoc &dl,
4968 unsigned vectorWidth) {
4969 assert((vectorWidth == 128 || vectorWidth == 256) &&(static_cast <bool> ((vectorWidth == 128 || vectorWidth
== 256) && "Unsupported vector width") ? void (0) : __assert_fail
("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4970, __extension__ __PRETTY_FUNCTION__))
4970 "Unsupported vector width")(static_cast <bool> ((vectorWidth == 128 || vectorWidth
== 256) && "Unsupported vector width") ? void (0) : __assert_fail
("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4970, __extension__ __PRETTY_FUNCTION__))
;
4971 // Inserting UNDEF is Result
4972 if (Vec.isUndef())
4973 return Result;
4974 EVT VT = Vec.getValueType();
4975 EVT ElVT = VT.getVectorElementType();
4976 EVT ResultVT = Result.getValueType();
4977
4978 // Insert the relevant vectorWidth bits.
4979 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
4980 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4980, __extension__ __PRETTY_FUNCTION__))
;
4981
4982 // This is the index of the first element of the vectorWidth-bit chunk
4983 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
4984 IdxVal &= ~(ElemsPerChunk - 1);
4985
4986 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
4987 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
4988}
4989
4990/// Generate a DAG to put 128-bits into a vector > 128 bits. This
4991/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
4992/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
4993/// simple superregister reference. Idx is an index in the 128 bits
4994/// we want. It need not be aligned to a 128-bit boundary. That makes
4995/// lowering INSERT_VECTOR_ELT operations easier.
4996static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
4997 SelectionDAG &DAG, const SDLoc &dl) {
4998 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is128BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 4998, __extension__ __PRETTY_FUNCTION__))
;
4999 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5000}
5001
5002static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5003 SelectionDAG &DAG, const SDLoc &dl) {
5004 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is256BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is256BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5004, __extension__ __PRETTY_FUNCTION__))
;
5005 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
5006}
5007
5008// Return true if the instruction zeroes the unused upper part of the
5009// destination and accepts mask.
5010static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5011 switch (Opcode) {
5012 default:
5013 return false;
5014 case X86ISD::TESTM:
5015 case X86ISD::TESTNM:
5016 case X86ISD::PCMPEQM:
5017 case X86ISD::PCMPGTM:
5018 case X86ISD::CMPM:
5019 case X86ISD::CMPMU:
5020 return true;
5021 }
5022}
5023
5024/// Insert i1-subvector to i1-vector.
5025static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5026 const X86Subtarget &Subtarget) {
5027
5028 SDLoc dl(Op);
5029 SDValue Vec = Op.getOperand(0);
5030 SDValue SubVec = Op.getOperand(1);
5031 SDValue Idx = Op.getOperand(2);
5032
5033 if (!isa<ConstantSDNode>(Idx))
5034 return SDValue();
5035
5036 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5037 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5038 return Op;
5039
5040 MVT OpVT = Op.getSimpleValueType();
5041 MVT SubVecVT = SubVec.getSimpleValueType();
5042 unsigned NumElems = OpVT.getVectorNumElements();
5043 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5044
5045 assert(IdxVal + SubVecNumElems <= NumElems &&(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5047, __extension__ __PRETTY_FUNCTION__))
5046 IdxVal % SubVecVT.getSizeInBits() == 0 &&(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5047, __extension__ __PRETTY_FUNCTION__))
5047 "Unexpected index value in INSERT_SUBVECTOR")(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5047, __extension__ __PRETTY_FUNCTION__))
;
5048
5049 // There are 3 possible cases:
5050 // 1. Subvector should be inserted in the lower part (IdxVal == 0)
5051 // 2. Subvector should be inserted in the upper part
5052 // (IdxVal + SubVecNumElems == NumElems)
5053 // 3. Subvector should be inserted in the middle (for example v2i1
5054 // to v16i1, index 2)
5055
5056 // If this node widens - by concatenating zeroes - the type of the result
5057 // of a node with instruction that zeroes all upper (irrelevant) bits of the
5058 // output register, mark this node as legal to enable replacing them with
5059 // the v8i1 version of the previous instruction during instruction selection.
5060 // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
5061 // while zeroing all the upper remaining 60 bits of the register. if the
5062 // result of such instruction is inserted into an allZeroVector, then we can
5063 // safely remove insert_vector (in instruction selection) as the cmp instr
5064 // already zeroed the rest of the register.
5065 if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
5066 (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) ||
5067 (SubVec.getOpcode() == ISD::AND &&
5068 (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) ||
5069 isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
5070 return Op;
5071
5072 // extend to natively supported kshift
5073 MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5074 MVT WideOpVT = OpVT;
5075 if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
5076 WideOpVT = MinVT;
5077
5078 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5079 SDValue Undef = DAG.getUNDEF(WideOpVT);
5080 SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5081 Undef, SubVec, ZeroIdx);
5082
5083 // Extract sub-vector if require.
5084 auto ExtractSubVec = [&](SDValue V) {
5085 return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
5086 OpVT, V, ZeroIdx);
5087 };
5088
5089 if (Vec.isUndef()) {
5090 if (IdxVal != 0) {
5091 SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
5092 WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5093 ShiftBits);
5094 }
5095 return ExtractSubVec(WideSubVec);
5096 }
5097
5098 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5099 NumElems = WideOpVT.getVectorNumElements();
5100 unsigned ShiftLeft = NumElems - SubVecNumElems;
5101 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5102 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5103 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5104 Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5105 DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
5106 return ExtractSubVec(Vec);
5107 }
5108
5109 if (IdxVal == 0) {
5110 // Zero lower bits of the Vec
5111 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5112 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5113 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5114 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5115 // Merge them together, SubVec should be zero extended.
5116 WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5117 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5118 SubVec, ZeroIdx);
5119 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
5120 return ExtractSubVec(Vec);
5121 }
5122
5123 // Simple case when we put subvector in the upper part
5124 if (IdxVal + SubVecNumElems == NumElems) {
5125 // Zero upper bits of the Vec
5126 WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5127 DAG.getConstant(IdxVal, dl, MVT::i8));
5128 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5129 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5130 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5131 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5132 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
5133 return ExtractSubVec(Vec);
5134 }
5135 // Subvector should be inserted in the middle - use shuffle
5136 WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
5137 SubVec, ZeroIdx);
5138 SmallVector<int, 64> Mask;
5139 for (unsigned i = 0; i < NumElems; ++i)
5140 Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
5141 i : i + NumElems);
5142 return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
5143}
5144
5145/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
5146/// instructions. This is used because creating CONCAT_VECTOR nodes of
5147/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
5148/// large BUILD_VECTORS.
5149static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
5150 unsigned NumElems, SelectionDAG &DAG,
5151 const SDLoc &dl) {
5152 SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5153 return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
5154}
5155
5156static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
5157 unsigned NumElems, SelectionDAG &DAG,
5158 const SDLoc &dl) {
5159 SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5160 return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
5161}
5162
5163/// Returns a vector of specified type with all bits set.
5164/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5165/// Then bitcast to their original type, ensuring they get CSE'd.
5166static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5167 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5168, __extension__ __PRETTY_FUNCTION__))
5168 "Expected a 128/256/512-bit vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5168, __extension__ __PRETTY_FUNCTION__))
;
5169
5170 APInt Ones = APInt::getAllOnesValue(32);
5171 unsigned NumElts = VT.getSizeInBits() / 32;
5172 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5173 return DAG.getBitcast(VT, Vec);
5174}
5175
5176static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
5177 SelectionDAG &DAG) {
5178 EVT InVT = In.getValueType();
5179 assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode")(static_cast <bool> ((X86ISD::VSEXT == Opc || X86ISD::VZEXT
== Opc) && "Unexpected opcode") ? void (0) : __assert_fail
("(X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5179, __extension__ __PRETTY_FUNCTION__))
;
5180
5181 if (VT.is128BitVector() && InVT.is128BitVector())
5182 return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
5183 : DAG.getZeroExtendVectorInReg(In, DL, VT);
5184
5185 // For 256-bit vectors, we only need the lower (128-bit) input half.
5186 // For 512-bit vectors, we only need the lower input half or quarter.
5187 if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
5188 int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5189 In = extractSubVector(In, 0, DAG, DL,
5190 std::max(128, (int)VT.getSizeInBits() / Scale));
5191 }
5192
5193 return DAG.getNode(Opc, DL, VT, In);
5194}
5195
5196/// Returns a vector_shuffle node for an unpackl operation.
5197static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5198 SDValue V1, SDValue V2) {
5199 SmallVector<int, 8> Mask;
5200 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5201 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5202}
5203
5204/// Returns a vector_shuffle node for an unpackh operation.
5205static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5206 SDValue V1, SDValue V2) {
5207 SmallVector<int, 8> Mask;
5208 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5209 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5210}
5211
5212/// Return a vector_shuffle of the specified vector of zero or undef vector.
5213/// This produces a shuffle where the low element of V2 is swizzled into the
5214/// zero/undef vector, landing at element Idx.
5215/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5216static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5217 bool IsZero,
5218 const X86Subtarget &Subtarget,
5219 SelectionDAG &DAG) {
5220 MVT VT = V2.getSimpleValueType();
5221 SDValue V1 = IsZero
5222 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5223 int NumElems = VT.getVectorNumElements();
5224 SmallVector<int, 16> MaskVec(NumElems);
5225 for (int i = 0; i != NumElems; ++i)
5226 // If this is the insertion idx, put the low elt of V2 here.
5227 MaskVec[i] = (i == Idx) ? NumElems : i;
5228 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5229}
5230
5231static SDValue peekThroughBitcasts(SDValue V) {
5232 while (V.getNode() && V.getOpcode() == ISD::BITCAST)
5233 V = V.getOperand(0);
5234 return V;
5235}
5236
5237static SDValue peekThroughOneUseBitcasts(SDValue V) {
5238 while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
5239 V.getOperand(0).hasOneUse())
5240 V = V.getOperand(0);
5241 return V;
5242}
5243
5244static const Constant *getTargetConstantFromNode(SDValue Op) {
5245 Op = peekThroughBitcasts(Op);
5246
5247 auto *Load = dyn_cast<LoadSDNode>(Op);
5248 if (!Load)
5249 return nullptr;
5250
5251 SDValue Ptr = Load->getBasePtr();
5252 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5253 Ptr->getOpcode() == X86ISD::WrapperRIP)
5254 Ptr = Ptr->getOperand(0);
5255
5256 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5257 if (!CNode || CNode->isMachineConstantPoolEntry())
5258 return nullptr;
5259
5260 return dyn_cast<Constant>(CNode->getConstVal());
5261}
5262
5263// Extract raw constant bits from constant pools.
5264static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5265 APInt &UndefElts,
5266 SmallVectorImpl<APInt> &EltBits,
5267 bool AllowWholeUndefs = true,
5268 bool AllowPartialUndefs = true) {
5269 assert(EltBits.empty() && "Expected an empty EltBits vector")(static_cast <bool> (EltBits.empty() && "Expected an empty EltBits vector"
) ? void (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5269, __extension__ __PRETTY_FUNCTION__))
;
5270
5271 Op = peekThroughBitcasts(Op);
5272
5273 EVT VT = Op.getValueType();
5274 unsigned SizeInBits = VT.getSizeInBits();
5275 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(static_cast <bool> ((SizeInBits % EltSizeInBits) == 0 &&
"Can't split constant!") ? void (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5275, __extension__ __PRETTY_FUNCTION__))
;
5276 unsigned NumElts = SizeInBits / EltSizeInBits;
5277
5278 // Bitcast a source array of element bits to the target size.
5279 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5280 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5281 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5282 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(static_cast <bool> ((NumSrcElts * SrcEltSizeInBits) ==
SizeInBits && "Constant bit sizes don't match") ? void
(0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5283, __extension__ __PRETTY_FUNCTION__))
5283 "Constant bit sizes don't match")(static_cast <bool> ((NumSrcElts * SrcEltSizeInBits) ==
SizeInBits && "Constant bit sizes don't match") ? void
(0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5283, __extension__ __PRETTY_FUNCTION__))
;
5284
5285 // Don't split if we don't allow undef bits.
5286 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5287 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5288 return false;
5289
5290 // If we're already the right size, don't bother bitcasting.
5291 if (NumSrcElts == NumElts) {
5292 UndefElts = UndefSrcElts;
5293 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5294 return true;
5295 }
5296
5297 // Extract all the undef/constant element data and pack into single bitsets.
5298 APInt UndefBits(SizeInBits, 0);
5299 APInt MaskBits(SizeInBits, 0);
5300
5301 for (unsigned i = 0; i != NumSrcElts; ++i) {
5302 unsigned BitOffset = i * SrcEltSizeInBits;
5303 if (UndefSrcElts[i])
5304 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5305 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5306 }
5307
5308 // Split the undef/constant single bitset data into the target elements.
5309 UndefElts = APInt(NumElts, 0);
5310 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5311
5312 for (unsigned i = 0; i != NumElts; ++i) {
5313 unsigned BitOffset = i * EltSizeInBits;
5314 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5315
5316 // Only treat an element as UNDEF if all bits are UNDEF.
5317 if (UndefEltBits.isAllOnesValue()) {
5318 if (!AllowWholeUndefs)
5319 return false;
5320 UndefElts.setBit(i);
5321 continue;
5322 }
5323
5324 // If only some bits are UNDEF then treat them as zero (or bail if not
5325 // supported).
5326 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5327 return false;
5328
5329 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5330 EltBits[i] = Bits.getZExtValue();
5331 }
5332 return true;
5333 };
5334
5335 // Collect constant bits and insert into mask/undef bit masks.
5336 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5337 unsigned UndefBitIndex) {
5338 if (!Cst)
5339 return false;
5340 if (isa<UndefValue>(Cst)) {
5341 Undefs.setBit(UndefBitIndex);
5342 return true;
5343 }
5344 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5345 Mask = CInt->getValue();
5346 return true;
5347 }
5348 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5349 Mask = CFP->getValueAPF().bitcastToAPInt();
5350 return true;
5351 }
5352 return false;
5353 };
5354
5355 // Handle UNDEFs.
5356 if (Op.isUndef()) {
5357 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5358 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5359 return CastBitData(UndefSrcElts, SrcEltBits);
5360 }
5361
5362 // Extract scalar constant bits.
5363 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5364 APInt UndefSrcElts = APInt::getNullValue(1);
5365 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5366 return CastBitData(UndefSrcElts, SrcEltBits);
5367 }
5368
5369 // Extract constant bits from build vector.
5370 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5371 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5372 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5373
5374 APInt UndefSrcElts(NumSrcElts, 0);
5375 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5376 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5377 const SDValue &Src = Op.getOperand(i);
5378 if (Src.isUndef()) {
5379 UndefSrcElts.setBit(i);
5380 continue;
5381 }
5382 auto *Cst = cast<ConstantSDNode>(Src);
5383 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5384 }
5385 return CastBitData(UndefSrcElts, SrcEltBits);
5386 }
5387
5388 // Extract constant bits from constant pool vector.
5389 if (auto *Cst = getTargetConstantFromNode(Op)) {
5390 Type *CstTy = Cst->getType();
5391 if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
5392 return false;
5393
5394 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
5395 unsigned NumSrcElts = CstTy->getVectorNumElements();
5396
5397 APInt UndefSrcElts(NumSrcElts, 0);
5398 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5399 for (unsigned i = 0; i != NumSrcElts; ++i)
5400 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
5401 UndefSrcElts, i))
5402 return false;
5403
5404 return CastBitData(UndefSrcElts, SrcEltBits);
5405 }
5406
5407 // Extract constant bits from a broadcasted constant pool scalar.
5408 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5409 EltSizeInBits <= VT.getScalarSizeInBits()) {
5410 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5411 unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
5412 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5413
5414 APInt UndefSrcElts(NumSrcElts, 0);
5415 SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
5416 if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
5417 if (UndefSrcElts[0])
5418 UndefSrcElts.setBits(0, NumSrcElts);
5419 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
5420 return CastBitData(UndefSrcElts, SrcEltBits);
5421 }
5422 }
5423 }
5424
5425 // Extract a rematerialized scalar constant insertion.
5426 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5427 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5428 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5429 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5430 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5431
5432 APInt UndefSrcElts(NumSrcElts, 0);
5433 SmallVector<APInt, 64> SrcEltBits;
5434 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5435 SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
5436 SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
5437 return CastBitData(UndefSrcElts, SrcEltBits);
5438 }
5439
5440 return false;
5441}
5442
5443static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5444 unsigned MaskEltSizeInBits,
5445 SmallVectorImpl<uint64_t> &RawMask) {
5446 APInt UndefElts;
5447 SmallVector<APInt, 64> EltBits;
5448
5449 // Extract the raw target constant bits.
5450 // FIXME: We currently don't support UNDEF bits or mask entries.
5451 if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
5452 EltBits, /* AllowWholeUndefs */ false,
5453 /* AllowPartialUndefs */ false))
5454 return false;
5455
5456 // Insert the extracted elements into the mask.
5457 for (APInt Elt : EltBits)
5458 RawMask.push_back(Elt.getZExtValue());
5459
5460 return true;
5461}
5462
5463/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
5464/// Note: This ignores saturation, so inputs must be checked first.
5465static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
5466 bool Unary) {
5467 assert(Mask.empty() && "Expected an empty shuffle mask vector")(static_cast <bool> (Mask.empty() && "Expected an empty shuffle mask vector"
) ? void (0) : __assert_fail ("Mask.empty() && \"Expected an empty shuffle mask vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5467, __extension__ __PRETTY_FUNCTION__))
;
5468 unsigned NumElts = VT.getVectorNumElements();
5469 unsigned NumLanes = VT.getSizeInBits() / 128;
5470 unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();
5471 unsigned Offset = Unary ? 0 : NumElts;
5472
5473 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
5474 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5475 Mask.push_back(Elt + (Lane * NumEltsPerLane));
5476 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5477 Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
5478 }
5479}
5480
5481/// Calculates the shuffle mask corresponding to the target-specific opcode.
5482/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
5483/// operands in \p Ops, and returns true.
5484/// Sets \p IsUnary to true if only one source is used. Note that this will set
5485/// IsUnary for shuffles which use a single input multiple times, and in those
5486/// cases it will adjust the mask to only have indices within that single input.
5487/// It is an error to call this with non-empty Mask/Ops vectors.
5488static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
5489 SmallVectorImpl<SDValue> &Ops,
5490 SmallVectorImpl<int> &Mask, bool &IsUnary) {
5491 unsigned NumElems = VT.getVectorNumElements();
5492 SDValue ImmN;
5493
5494 assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector")(static_cast <bool> (Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"
) ? void (0) : __assert_fail ("Mask.empty() && \"getTargetShuffleMask expects an empty Mask vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5494, __extension__ __PRETTY_FUNCTION__))
;
5495 assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector")(static_cast <bool> (Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"
) ? void (0) : __assert_fail ("Ops.empty() && \"getTargetShuffleMask expects an empty Ops vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5495, __extension__ __PRETTY_FUNCTION__))
;
5496
5497 IsUnary = false;
5498 bool IsFakeUnary = false;
5499 switch(N->getOpcode()) {
5500 case X86ISD::BLENDI:
5501 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5501, __extension__ __PRETTY_FUNCTION__))
;
5502 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5502, __extension__ __PRETTY_FUNCTION__))
;
5503 ImmN = N->getOperand(N->getNumOperands()-1);
5504 DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5505 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5506 break;
5507 case X86ISD::SHUFP:
5508 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5508, __extension__ __PRETTY_FUNCTION__))
;
5509 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5509, __extension__ __PRETTY_FUNCTION__))
;
5510 ImmN = N->getOperand(N->getNumOperands()-1);
5511 DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5512 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5513 break;
5514 case X86ISD::INSERTPS:
5515 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5515, __extension__ __PRETTY_FUNCTION__))
;
5516 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5516, __extension__ __PRETTY_FUNCTION__))
;
5517 ImmN = N->getOperand(N->getNumOperands()-1);
5518 DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5519 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5520 break;
5521 case X86ISD::EXTRQI:
5522 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5522, __extension__ __PRETTY_FUNCTION__))
;
5523 if (isa<ConstantSDNode>(N->getOperand(1)) &&
5524 isa<ConstantSDNode>(N->getOperand(2))) {
5525 int BitLen = N->getConstantOperandVal(1);
5526 int BitIdx = N->getConstantOperandVal(2);
5527 DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
5528 IsUnary = true;
5529 }
5530 break;
5531 case X86ISD::INSERTQI:
5532 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5532, __extension__ __PRETTY_FUNCTION__))
;
5533 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5533, __extension__ __PRETTY_FUNCTION__))
;
5534 if (isa<ConstantSDNode>(N->getOperand(2)) &&
5535 isa<ConstantSDNode>(N->getOperand(3))) {
5536 int BitLen = N->getConstantOperandVal(2);
5537 int BitIdx = N->getConstantOperandVal(3);
5538 DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
5539 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5540 }
5541 break;
5542 case X86ISD::UNPCKH:
5543 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5543, __extension__ __PRETTY_FUNCTION__))
;
5544 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5544, __extension__ __PRETTY_FUNCTION__))
;
5545 DecodeUNPCKHMask(VT, Mask);
5546 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5547 break;
5548 case X86ISD::UNPCKL:
5549 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5549, __extension__ __PRETTY_FUNCTION__))
;
5550 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5550, __extension__ __PRETTY_FUNCTION__))
;
5551 DecodeUNPCKLMask(VT, Mask);
5552 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5553 break;
5554 case X86ISD::MOVHLPS:
5555 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5555, __extension__ __PRETTY_FUNCTION__))
;
5556 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5556, __extension__ __PRETTY_FUNCTION__))
;
5557 DecodeMOVHLPSMask(NumElems, Mask);
5558 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5559 break;
5560 case X86ISD::MOVLHPS:
5561 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5561, __extension__ __PRETTY_FUNCTION__))
;
5562 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5562, __extension__ __PRETTY_FUNCTION__))
;
5563 DecodeMOVLHPSMask(NumElems, Mask);
5564 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5565 break;
5566 case X86ISD::PALIGNR:
5567 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5567, __extension__ __PRETTY_FUNCTION__))
;
5568 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5568, __extension__ __PRETTY_FUNCTION__))
;
5569 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5569, __extension__ __PRETTY_FUNCTION__))
;
5570 ImmN = N->getOperand(N->getNumOperands()-1);
5571 DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5572 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5573 Ops.push_back(N->getOperand(1));
5574 Ops.push_back(N->getOperand(0));
5575 break;
5576 case X86ISD::VSHLDQ:
5577 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5577, __extension__ __PRETTY_FUNCTION__))
;
5578 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5578, __extension__ __PRETTY_FUNCTION__))
;
5579 ImmN = N->getOperand(N->getNumOperands() - 1);
5580 DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5581 IsUnary = true;
5582 break;
5583 case X86ISD::VSRLDQ:
5584 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5584, __extension__ __PRETTY_FUNCTION__))
;
5585 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5585, __extension__ __PRETTY_FUNCTION__))
;
5586 ImmN = N->getOperand(N->getNumOperands() - 1);
5587 DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5588 IsUnary = true;
5589 break;
5590 case X86ISD::PSHUFD:
5591 case X86ISD::VPERMILPI:
5592 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5592, __extension__ __PRETTY_FUNCTION__))
;
5593 ImmN = N->getOperand(N->getNumOperands()-1);
5594 DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5595 IsUnary = true;
5596 break;
5597 case X86ISD::PSHUFHW:
5598 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5598, __extension__ __PRETTY_FUNCTION__))
;
5599 ImmN = N->getOperand(N->getNumOperands()-1);
5600 DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5601 IsUnary = true;
5602 break;
5603 case X86ISD::PSHUFLW:
5604 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5604, __extension__ __PRETTY_FUNCTION__))
;
5605 ImmN = N->getOperand(N->getNumOperands()-1);
5606 DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5607 IsUnary = true;
5608 break;
5609 case X86ISD::VZEXT_MOVL:
5610 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5610, __extension__ __PRETTY_FUNCTION__))
;
5611 DecodeZeroMoveLowMask(VT, Mask);
5612 IsUnary = true;
5613 break;
5614 case X86ISD::VBROADCAST: {
5615 SDValue N0 = N->getOperand(0);
5616 // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
5617 // add the pre-extracted value to the Ops vector.
5618 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5619 N0.getOperand(0).getValueType() == VT &&
5620 N0.getConstantOperandVal(1) == 0)
5621 Ops.push_back(N0.getOperand(0));
5622
5623 // We only decode broadcasts of same-sized vectors, unless the broadcast
5624 // came from an extract from the original width. If we found one, we
5625 // pushed it the Ops vector above.
5626 if (N0.getValueType() == VT || !Ops.empty()) {
5627 DecodeVectorBroadcast(VT, Mask);
5628 IsUnary = true;
5629 break;
5630 }
5631 return false;
5632 }
5633 case X86ISD::VPERMILPV: {
5634 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5634, __extension__ __PRETTY_FUNCTION__))
;
5635 IsUnary = true;
5636 SDValue MaskNode = N->getOperand(1);
5637 unsigned MaskEltSize = VT.getScalarSizeInBits();
5638 SmallVector<uint64_t, 32> RawMask;
5639 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5640 DecodeVPERMILPMask(VT, RawMask, Mask);
5641 break;
5642 }
5643 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5644 DecodeVPERMILPMask(C, MaskEltSize, Mask);
5645 break;
5646 }
5647 return false;
5648 }
5649 case X86ISD::PSHUFB: {
5650 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5650, __extension__ __PRETTY_FUNCTION__))
;
5651 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5651, __extension__ __PRETTY_FUNCTION__))
;
5652 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5652, __extension__ __PRETTY_FUNCTION__))
;
5653 IsUnary = true;
5654 SDValue MaskNode = N->getOperand(1);
5655 SmallVector<uint64_t, 32> RawMask;
5656 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5657 DecodePSHUFBMask(RawMask, Mask);
5658 break;
5659 }
5660 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5661 DecodePSHUFBMask(C, Mask);
5662 break;
5663 }
5664 return false;
5665 }
5666 case X86ISD::VPERMI:
5667 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5667, __extension__ __PRETTY_FUNCTION__))
;
5668 ImmN = N->getOperand(N->getNumOperands()-1);
5669 DecodeVPERMMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5670 IsUnary = true;
5671 break;
5672 case X86ISD::MOVSS:
5673 case X86ISD::MOVSD:
5674 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5674, __extension__ __PRETTY_FUNCTION__))
;
5675 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5675, __extension__ __PRETTY_FUNCTION__))
;
5676 DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
5677 break;
5678 case X86ISD::VPERM2X128:
5679 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5679, __extension__ __PRETTY_FUNCTION__))
;
5680 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5680, __extension__ __PRETTY_FUNCTION__))
;
5681 ImmN = N->getOperand(N->getNumOperands()-1);
5682 DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5683 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5684 break;
5685 case X86ISD::MOVSLDUP:
5686 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5686, __extension__ __PRETTY_FUNCTION__))
;
5687 DecodeMOVSLDUPMask(VT, Mask);
5688 IsUnary = true;
5689 break;
5690 case X86ISD::MOVSHDUP:
5691 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5691, __extension__ __PRETTY_FUNCTION__))
;
5692 DecodeMOVSHDUPMask(VT, Mask);
5693 IsUnary = true;
5694 break;
5695 case X86ISD::MOVDDUP:
5696 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5696, __extension__ __PRETTY_FUNCTION__))
;
5697 DecodeMOVDDUPMask(VT, Mask);
5698 IsUnary = true;
5699 break;
5700 case X86ISD::MOVLPD:
5701 case X86ISD::MOVLPS:
5702 // Not yet implemented
5703 return false;
5704 case X86ISD::VPERMIL2: {
5705 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5705, __extension__ __PRETTY_FUNCTION__))
;
5706 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5706, __extension__ __PRETTY_FUNCTION__))
;
5707 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5708 unsigned MaskEltSize = VT.getScalarSizeInBits();
5709 SDValue MaskNode = N->getOperand(2);
5710 SDValue CtrlNode = N->getOperand(3);
5711 if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
5712 unsigned CtrlImm = CtrlOp->getZExtValue();
5713 SmallVector<uint64_t, 32> RawMask;
5714 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5715 DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
5716 break;
5717 }
5718 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5719 DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
5720 break;
5721 }
5722 }
5723 return false;
5724 }
5725 case X86ISD::VPPERM: {
5726 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5726, __extension__ __PRETTY_FUNCTION__))
;
5727 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5727, __extension__ __PRETTY_FUNCTION__))
;
5728 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5729 SDValue MaskNode = N->getOperand(2);
5730 SmallVector<uint64_t, 32> RawMask;
5731 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5732 DecodeVPPERMMask(RawMask, Mask);
5733 break;
5734 }
5735 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5736 DecodeVPPERMMask(C, Mask);
5737 break;
5738 }
5739 return false;
5740 }
5741 case X86ISD::VPERMV: {
5742 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5742, __extension__ __PRETTY_FUNCTION__))
;
5743 IsUnary = true;
5744 // Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
5745 Ops.push_back(N->getOperand(1));
5746 SDValue MaskNode = N->getOperand(0);
5747 SmallVector<uint64_t, 32> RawMask;
5748 unsigned MaskEltSize = VT.getScalarSizeInBits();
5749 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5750 DecodeVPERMVMask(RawMask, Mask);
5751 break;
5752 }
5753 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5754 DecodeVPERMVMask(C, MaskEltSize, Mask);
5755 break;
5756 }
5757 return false;
5758 }
5759 case X86ISD::VPERMV3: {
5760 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5760, __extension__ __PRETTY_FUNCTION__))
;
5761 assert(N->getOperand(2).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(2).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(2).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5761, __extension__ __PRETTY_FUNCTION__))
;
5762 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
5763 // Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
5764 Ops.push_back(N->getOperand(0));
5765 Ops.push_back(N->getOperand(2));
5766 SDValue MaskNode = N->getOperand(1);
5767 unsigned MaskEltSize = VT.getScalarSizeInBits();
5768 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5769 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5770 break;
5771 }
5772 return false;
5773 }
5774 case X86ISD::VPERMIV3: {
5775 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5775, __extension__ __PRETTY_FUNCTION__))
;
5776 assert(N->getOperand(2).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(2).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(2).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5776, __extension__ __PRETTY_FUNCTION__))
;
5777 IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
5778 // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
5779 Ops.push_back(N->getOperand(1));
5780 Ops.push_back(N->getOperand(2));
5781 SDValue MaskNode = N->getOperand(0);
5782 unsigned MaskEltSize = VT.getScalarSizeInBits();
5783 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5784 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5785 break;
5786 }
5787 return false;
5788 }
5789 default: llvm_unreachable("unknown target shuffle node")::llvm::llvm_unreachable_internal("unknown target shuffle node"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5789)
;
5790 }
5791
5792 // Empty mask indicates the decode failed.
5793 if (Mask.empty())
5794 return false;
5795
5796 // Check if we're getting a shuffle mask with zero'd elements.
5797 if (!AllowSentinelZero)
5798 if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
5799 return false;
5800
5801 // If we have a fake unary shuffle, the shuffle mask is spread across two
5802 // inputs that are actually the same node. Re-map the mask to always point
5803 // into the first input.
5804 if (IsFakeUnary)
5805 for (int &M : Mask)
5806 if (M >= (int)Mask.size())
5807 M -= Mask.size();
5808
5809 // If we didn't already add operands in the opcode-specific code, default to
5810 // adding 1 or 2 operands starting at 0.
5811 if (Ops.empty()) {
5812 Ops.push_back(N->getOperand(0));
5813 if (!IsUnary || IsFakeUnary)
5814 Ops.push_back(N->getOperand(1));
5815 }
5816
5817 return true;
5818}
5819
5820/// Check a target shuffle mask's inputs to see if we can set any values to
5821/// SM_SentinelZero - this is for elements that are known to be zero
5822/// (not just zeroable) from their inputs.
5823/// Returns true if the target shuffle mask was decoded.
5824static bool setTargetShuffleZeroElements(SDValue N,
5825 SmallVectorImpl<int> &Mask,
5826 SmallVectorImpl<SDValue> &Ops) {
5827 bool IsUnary;
5828 if (!isTargetShuffle(N.getOpcode()))
5829 return false;
5830
5831 MVT VT = N.getSimpleValueType();
5832 if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
5833 return false;
5834
5835 SDValue V1 = Ops[0];
5836 SDValue V2 = IsUnary ? V1 : Ops[1];
5837
5838 V1 = peekThroughBitcasts(V1);
5839 V2 = peekThroughBitcasts(V2);
5840
5841 assert((VT.getSizeInBits() % Mask.size()) == 0 &&(static_cast <bool> ((VT.getSizeInBits() % Mask.size())
== 0 && "Illegal split of shuffle value type") ? void
(0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5842, __extension__ __PRETTY_FUNCTION__))
5842 "Illegal split of shuffle value type")(static_cast <bool> ((VT.getSizeInBits() % Mask.size())
== 0 && "Illegal split of shuffle value type") ? void
(0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5842, __extension__ __PRETTY_FUNCTION__))
;
5843 unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
5844
5845 // Extract known constant input data.
5846 APInt UndefSrcElts[2];
5847 SmallVector<APInt, 32> SrcEltBits[2];
5848 bool IsSrcConstant[2] = {
5849 getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
5850 SrcEltBits[0], true, false),
5851 getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
5852 SrcEltBits[1], true, false)};
5853
5854 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
5855 int M = Mask[i];
5856
5857 // Already decoded as SM_SentinelZero / SM_SentinelUndef.
5858 if (M < 0)
5859 continue;
5860
5861 // Determine shuffle input and normalize the mask.
5862 unsigned SrcIdx = M / Size;
5863 SDValue V = M < Size ? V1 : V2;
5864 M %= Size;
5865
5866 // We are referencing an UNDEF input.
5867 if (V.isUndef()) {
5868 Mask[i] = SM_SentinelUndef;
5869 continue;
5870 }
5871
5872 // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
5873 // TODO: We currently only set UNDEF for integer types - floats use the same
5874 // registers as vectors and many of the scalar folded loads rely on the
5875 // SCALAR_TO_VECTOR pattern.
5876 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5877 (Size % V.getValueType().getVectorNumElements()) == 0) {
5878 int Scale = Size / V.getValueType().getVectorNumElements();
5879 int Idx = M / Scale;
5880 if (Idx != 0 && !VT.isFloatingPoint())
5881 Mask[i] = SM_SentinelUndef;
5882 else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
5883 Mask[i] = SM_SentinelZero;
5884 continue;
5885 }
5886
5887 // Attempt to extract from the source's constant bits.
5888 if (IsSrcConstant[SrcIdx]) {
5889 if (UndefSrcElts[SrcIdx][M])
5890 Mask[i] = SM_SentinelUndef;
5891 else if (SrcEltBits[SrcIdx][M] == 0)
5892 Mask[i] = SM_SentinelZero;
5893 }
5894 }
5895
5896 assert(VT.getVectorNumElements() == Mask.size() &&(static_cast <bool> (VT.getVectorNumElements() == Mask.
size() && "Different mask size from vector size!") ? void
(0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5897, __extension__ __PRETTY_FUNCTION__))
5897 "Different mask size from vector size!")(static_cast <bool> (VT.getVectorNumElements() == Mask.
size() && "Different mask size from vector size!") ? void
(0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5897, __extension__ __PRETTY_FUNCTION__))
;
5898 return true;
5899}
5900
5901// Attempt to decode ops that could be represented as a shuffle mask.
5902// The decoded shuffle mask may contain a different number of elements to the
5903// destination value type.
5904static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
5905 SmallVectorImpl<SDValue> &Ops,
5906 SelectionDAG &DAG) {
5907 Mask.clear();
5908 Ops.clear();
5909
5910 MVT VT = N.getSimpleValueType();
5911 unsigned NumElts = VT.getVectorNumElements();
5912 unsigned NumSizeInBits = VT.getSizeInBits();
5913 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
5914 assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&(static_cast <bool> ((NumBitsPerElt % 8) == 0 &&
(NumSizeInBits % 8) == 0 && "Expected byte aligned value types"
) ? void (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5915, __extension__ __PRETTY_FUNCTION__))
5915 "Expected byte aligned value types")(static_cast <bool> ((NumBitsPerElt % 8) == 0 &&
(NumSizeInBits % 8) == 0 && "Expected byte aligned value types"
) ? void (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5915, __extension__ __PRETTY_FUNCTION__))
;
5916
5917 unsigned Opcode = N.getOpcode();
5918 switch (Opcode) {
5919 case ISD::AND:
5920 case X86ISD::ANDNP: {
5921 // Attempt to decode as a per-byte mask.
5922 APInt UndefElts;
5923 SmallVector<APInt, 32> EltBits;
5924 SDValue N0 = N.getOperand(0);
5925 SDValue N1 = N.getOperand(1);
5926 bool IsAndN = (X86ISD::ANDNP == Opcode);
5927 uint64_t ZeroMask = IsAndN ? 255 : 0;
5928 if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
5929 return false;
5930 for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
5931 if (UndefElts[i]) {
5932 Mask.push_back(SM_SentinelUndef);
5933 continue;
5934 }
5935 uint64_t ByteBits = EltBits[i].getZExtValue();
5936 if (ByteBits != 0 && ByteBits != 255)
5937 return false;
5938 Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
5939 }
5940 Ops.push_back(IsAndN ? N1 : N0);
5941 return true;
5942 }
5943 case ISD::SCALAR_TO_VECTOR: {
5944 // Match against a scalar_to_vector of an extract from a vector,
5945 // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
5946 SDValue N0 = N.getOperand(0);
5947 SDValue SrcExtract;
5948
5949 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5950 N0.getOperand(0).getValueType() == VT) ||
5951 (N0.getOpcode() == X86ISD::PEXTRW &&
5952 N0.getOperand(0).getValueType() == MVT::v8i16) ||
5953 (N0.getOpcode() == X86ISD::PEXTRB &&
5954 N0.getOperand(0).getValueType() == MVT::v16i8)) {
5955 SrcExtract = N0;
5956 }
5957
5958 if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
5959 return false;
5960
5961 SDValue SrcVec = SrcExtract.getOperand(0);
5962 EVT SrcVT = SrcVec.getValueType();
5963 unsigned NumSrcElts = SrcVT.getVectorNumElements();
5964 unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
5965
5966 unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
5967 if (NumSrcElts <= SrcIdx)
5968 return false;
5969
5970 Ops.push_back(SrcVec);
5971 Mask.push_back(SrcIdx);
5972 Mask.append(NumZeros, SM_SentinelZero);
5973 Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
5974 return true;
5975 }
5976 case X86ISD::PINSRB:
5977 case X86ISD::PINSRW: {
5978 SDValue InVec = N.getOperand(0);
5979 SDValue InScl = N.getOperand(1);
5980 uint64_t InIdx = N.getConstantOperandVal(2);
5981 assert(InIdx < NumElts && "Illegal insertion index")(static_cast <bool> (InIdx < NumElts && "Illegal insertion index"
) ? void (0) : __assert_fail ("InIdx < NumElts && \"Illegal insertion index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 5981, __extension__ __PRETTY_FUNCTION__))
;
5982
5983 // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
5984 if (X86::isZeroNode(InScl)) {
5985 Ops.push_back(InVec);
5986 for (unsigned i = 0; i != NumElts; ++i)
5987 Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
5988 return true;
5989 }
5990
5991 // Attempt to recognise a PINSR*(PEXTR*) shuffle pattern.
5992 // TODO: Expand this to support INSERT_VECTOR_ELT/etc.
5993 unsigned ExOp =
5994 (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
5995 if (InScl.getOpcode() != ExOp)
5996 return false;
5997
5998 SDValue ExVec = InScl.getOperand(0);
5999 uint64_t ExIdx = InScl.getConstantOperandVal(1);
6000 assert(ExIdx < NumElts && "Illegal extraction index")(static_cast <bool> (ExIdx < NumElts && "Illegal extraction index"
) ? void (0) : __assert_fail ("ExIdx < NumElts && \"Illegal extraction index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6000, __extension__ __PRETTY_FUNCTION__))
;
6001 Ops.push_back(InVec);
6002 Ops.push_back(ExVec);
6003 for (unsigned i = 0; i != NumElts; ++i)
6004 Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
6005 return true;
6006 }
6007 case X86ISD::PACKSS:
6008 case X86ISD::PACKUS: {
6009 SDValue N0 = N.getOperand(0);
6010 SDValue N1 = N.getOperand(1);
6011 assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&(static_cast <bool> (N0.getValueType().getVectorNumElements
() == (NumElts / 2) && N1.getValueType().getVectorNumElements
() == (NumElts / 2) && "Unexpected input value type")
? void (0) : __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6013, __extension__ __PRETTY_FUNCTION__))
6012 N1.getValueType().getVectorNumElements() == (NumElts / 2) &&(static_cast <bool> (N0.getValueType().getVectorNumElements
() == (NumElts / 2) && N1.getValueType().getVectorNumElements
() == (NumElts / 2) && "Unexpected input value type")
? void (0) : __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6013, __extension__ __PRETTY_FUNCTION__))
6013 "Unexpected input value type")(static_cast <bool> (N0.getValueType().getVectorNumElements
() == (NumElts / 2) && N1.getValueType().getVectorNumElements
() == (NumElts / 2) && "Unexpected input value type")
? void (0) : __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6013, __extension__ __PRETTY_FUNCTION__))
;
6014
6015 // If we know input saturation won't happen we can treat this
6016 // as a truncation shuffle.
6017 if (Opcode == X86ISD::PACKSS) {
6018 if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) ||
6019 (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt))
6020 return false;
6021 } else {
6022 APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
6023 if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) ||
6024 (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask)))
6025 return false;
6026 }
6027
6028 bool IsUnary = (N0 == N1);
6029
6030 Ops.push_back(N0);
6031 if (!IsUnary)
6032 Ops.push_back(N1);
6033
6034 createPackShuffleMask(VT, Mask, IsUnary);
6035 return true;
6036 }
6037 case X86ISD::VSHLI:
6038 case X86ISD::VSRLI: {
6039 uint64_t ShiftVal = N.getConstantOperandVal(1);
6040 // Out of range bit shifts are guaranteed to be zero.
6041 if (NumBitsPerElt <= ShiftVal) {
6042 Mask.append(NumElts, SM_SentinelZero);
6043 return true;
6044 }
6045
6046 // We can only decode 'whole byte' bit shifts as shuffles.
6047 if ((ShiftVal % 8) != 0)
6048 break;
6049
6050 uint64_t ByteShift = ShiftVal / 8;
6051 unsigned NumBytes = NumSizeInBits / 8;
6052 unsigned NumBytesPerElt = NumBitsPerElt / 8;
6053 Ops.push_back(N.getOperand(0));
6054
6055 // Clear mask to all zeros and insert the shifted byte indices.
6056 Mask.append(NumBytes, SM_SentinelZero);
6057
6058 if (X86ISD::VSHLI == Opcode) {
6059 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
6060 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6061 Mask[i + j] = i + j - ByteShift;
6062 } else {
6063 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
6064 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6065 Mask[i + j - ByteShift] = i + j;
6066 }
6067 return true;
6068 }
6069 case ISD::ZERO_EXTEND_VECTOR_INREG:
6070 case X86ISD::VZEXT: {
6071 // TODO - add support for VPMOVZX with smaller input vector types.
6072 SDValue Src = N.getOperand(0);
6073 MVT SrcVT = Src.getSimpleValueType();
6074 if (NumSizeInBits != SrcVT.getSizeInBits())
6075 break;
6076 DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask);
6077 Ops.push_back(Src);
6078 return true;
6079 }
6080 }
6081
6082 return false;
6083}
6084
6085/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
6086static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
6087 SmallVectorImpl<int> &Mask) {
6088 int MaskWidth = Mask.size();
6089 SmallVector<SDValue, 16> UsedInputs;
6090 for (int i = 0, e = Inputs.size(); i < e; ++i) {
6091 int lo = UsedInputs.size() * MaskWidth;
6092 int hi = lo + MaskWidth;
6093
6094 // Strip UNDEF input usage.
6095 if (Inputs[i].isUndef())
6096 for (int &M : Mask)
6097 if ((lo <= M) && (M < hi))
6098 M = SM_SentinelUndef;
6099
6100 // Check for unused inputs.
6101 if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
6102 UsedInputs.push_back(Inputs[i]);
6103 continue;
6104 }
6105 for (int &M : Mask)
6106 if (lo <= M)
6107 M -= MaskWidth;
6108 }
6109 Inputs = UsedInputs;
6110}
6111
6112/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
6113/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
6114/// remaining input indices in case we now have a unary shuffle and adjust the
6115/// inputs accordingly.
6116/// Returns true if the target shuffle mask was decoded.
6117static bool resolveTargetShuffleInputs(SDValue Op,
6118 SmallVectorImpl<SDValue> &Inputs,
6119 SmallVectorImpl<int> &Mask,
6120 SelectionDAG &DAG) {
6121 if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
6122 if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
6123 return false;
6124
6125 resolveTargetShuffleInputsAndMask(Inputs, Mask);
6126 return true;
6127}
6128
6129/// Returns the scalar element that will make up the ith
6130/// element of the result of the vector shuffle.
6131static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
6132 unsigned Depth) {
6133 if (Depth == 6)
6134 return SDValue(); // Limit search depth.
6135
6136 SDValue V = SDValue(N, 0);
6137 EVT VT = V.getValueType();
6138 unsigned Opcode = V.getOpcode();
6139
6140 // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
6141 if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
6142 int Elt = SV->getMaskElt(Index);
6143
6144 if (Elt < 0)
6145 return DAG.getUNDEF(VT.getVectorElementType());
6146
6147 unsigned NumElems = VT.getVectorNumElements();
6148 SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
6149 : SV->getOperand(1);
6150 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
6151 }
6152
6153 // Recurse into target specific vector shuffles to find scalars.
6154 if (isTargetShuffle(Opcode)) {
6155 MVT ShufVT = V.getSimpleValueType();
6156 MVT ShufSVT = ShufVT.getVectorElementType();
6157 int NumElems = (int)ShufVT.getVectorNumElements();
6158 SmallVector<int, 16> ShuffleMask;
6159 SmallVector<SDValue, 16> ShuffleOps;
6160 bool IsUnary;
6161
6162 if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
6163 return SDValue();
6164
6165 int Elt = ShuffleMask[Index];
6166 if (Elt == SM_SentinelZero)
6167 return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT)
6168 : DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT);
6169 if (Elt == SM_SentinelUndef)
6170 return DAG.getUNDEF(ShufSVT);
6171
6172 assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range")(static_cast <bool> (0 <= Elt && Elt < (2
*NumElems) && "Shuffle index out of range") ? void (0
) : __assert_fail ("0 <= Elt && Elt < (2*NumElems) && \"Shuffle index out of range\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6172, __extension__ __PRETTY_FUNCTION__))
;
6173 SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
6174 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
6175 Depth+1);
6176 }
6177
6178 // Actual nodes that may contain scalar elements
6179 if (Opcode == ISD::BITCAST) {
6180 V = V.getOperand(0);
6181 EVT SrcVT = V.getValueType();
6182 unsigned NumElems = VT.getVectorNumElements();
6183
6184 if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
6185 return SDValue();
6186 }
6187
6188 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
6189 return (Index == 0) ? V.getOperand(0)
6190 : DAG.getUNDEF(VT.getVectorElementType());
6191
6192 if (V.getOpcode() == ISD::BUILD_VECTOR)
6193 return V.getOperand(Index);
6194
6195 return SDValue();
6196}
6197
6198// Use PINSRB/PINSRW/PINSRD to create a build vector.
6199static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
6200 unsigned NumNonZero, unsigned NumZero,
6201 SelectionDAG &DAG,
6202 const X86Subtarget &Subtarget) {
6203 MVT VT = Op.getSimpleValueType();
6204 unsigned NumElts = VT.getVectorNumElements();
6205 assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) ||(static_cast <bool> (((VT == MVT::v8i16 && Subtarget
.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) &&
Subtarget.hasSSE41())) && "Illegal vector insertion"
) ? void (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6207, __extension__ __PRETTY_FUNCTION__))
6206 ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) &&(static_cast <bool> (((VT == MVT::v8i16 && Subtarget
.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) &&
Subtarget.hasSSE41())) && "Illegal vector insertion"
) ? void (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6207, __extension__ __PRETTY_FUNCTION__))
6207 "Illegal vector insertion")(static_cast <bool> (((VT == MVT::v8i16 && Subtarget
.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) &&
Subtarget.hasSSE41())) && "Illegal vector insertion"
) ? void (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6207, __extension__ __PRETTY_FUNCTION__))
;
6208
6209 SDLoc dl(Op);
6210 SDValue V;
6211 bool First = true;
6212
6213 for (unsigned i = 0; i < NumElts; ++i) {
6214 bool IsNonZero = (NonZeros & (1 << i)) != 0;
6215 if (!IsNonZero)
6216 continue;
6217
6218 // If the build vector contains zeros or our first insertion is not the
6219 // first index then insert into zero vector to break any register
6220 // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
6221 if (First) {
6222 First = false;
6223 if (NumZero || 0 != i)
6224 V = getZeroVector(VT, Subtarget, DAG, dl);
6225 else {
6226 assert(0 == i && "Expected insertion into zero-index")(static_cast <bool> (0 == i && "Expected insertion into zero-index"
) ? void (0) : __assert_fail ("0 == i && \"Expected insertion into zero-index\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6226, __extension__ __PRETTY_FUNCTION__))
;
6227 V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
6228 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6229 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6230 V = DAG.getBitcast(VT, V);
6231 continue;
6232 }
6233 }
6234 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i),
6235 DAG.getIntPtrConstant(i, dl));
6236 }
6237
6238 return V;
6239}
6240
6241/// Custom lower build_vector of v16i8.
6242static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
6243 unsigned NumNonZero, unsigned NumZero,
6244 SelectionDAG &DAG,
6245 const X86Subtarget &Subtarget) {
6246 if (NumNonZero > 8 && !Subtarget.hasSSE41())
6247 return SDValue();
6248
6249 // SSE4.1 - use PINSRB to insert each byte directly.
6250 if (Subtarget.hasSSE41())
6251 return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
6252 Subtarget);
6253
6254 SDLoc dl(Op);
6255 SDValue V;
6256 bool First = true;
6257
6258 // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
6259 for (unsigned i = 0; i < 16; ++i) {
6260 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
6261 if (ThisIsNonZero && First) {
6262 if (NumZero)
6263 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
6264 else
6265 V = DAG.getUNDEF(MVT::v8i16);
6266 First = false;
6267 }
6268
6269 if ((i & 1) != 0) {
6270 // FIXME: Investigate extending to i32 instead of just i16.
6271 // FIXME: Investigate combining the first 4 bytes as a i32 instead.
6272 SDValue ThisElt, LastElt;
6273 bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
6274 if (LastIsNonZero) {
6275 LastElt =
6276 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
6277 }
6278 if (ThisIsNonZero) {
6279 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
6280 ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
6281 DAG.getConstant(8, dl, MVT::i8));
6282 if (LastIsNonZero)
6283 ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
6284 } else
6285 ThisElt = LastElt;
6286
6287 if (ThisElt) {
6288 if (1 == i) {
6289 V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
6290 : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
6291 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6292 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6293 V = DAG.getBitcast(MVT::v8i16, V);
6294 } else {
6295 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
6296 DAG.getIntPtrConstant(i / 2, dl));
6297 }
6298 }
6299 }
6300 }
6301
6302 return DAG.getBitcast(MVT::v16i8, V);
6303}
6304
6305/// Custom lower build_vector of v8i16.
6306static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
6307 unsigned NumNonZero, unsigned NumZero,
6308 SelectionDAG &DAG,
6309 const X86Subtarget &Subtarget) {
6310 if (NumNonZero > 4 && !Subtarget.hasSSE41())
6311 return SDValue();
6312
6313 // Use PINSRW to insert each byte directly.
6314 return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
6315 Subtarget);
6316}
6317
6318/// Custom lower build_vector of v4i32 or v4f32.
6319static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
6320 const X86Subtarget &Subtarget) {
6321 // Find all zeroable elements.
6322 std::bitset<4> Zeroable;
6323 for (int i=0; i < 4; ++i) {
6324 SDValue Elt = Op->getOperand(i);
6325 Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
6326 }
6327 assert(Zeroable.size() - Zeroable.count() > 1 &&(static_cast <bool> (Zeroable.size() - Zeroable.count()
> 1 && "We expect at least two non-zero elements!"
) ? void (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6328, __extension__ __PRETTY_FUNCTION__))
6328 "We expect at least two non-zero elements!")(static_cast <bool> (Zeroable.size() - Zeroable.count()
> 1 && "We expect at least two non-zero elements!"
) ? void (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6328, __extension__ __PRETTY_FUNCTION__))
;
6329
6330 // We only know how to deal with build_vector nodes where elements are either
6331 // zeroable or extract_vector_elt with constant index.
6332 SDValue FirstNonZero;
6333 unsigned FirstNonZeroIdx;
6334 for (unsigned i=0; i < 4; ++i) {
6335 if (Zeroable[i])
6336 continue;
6337 SDValue Elt = Op->getOperand(i);
6338 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6339 !isa<ConstantSDNode>(Elt.getOperand(1)))
6340 return SDValue();
6341 // Make sure that this node is extracting from a 128-bit vector.
6342 MVT VT = Elt.getOperand(0).getSimpleValueType();
6343 if (!VT.is128BitVector())
6344 return SDValue();
6345 if (!FirstNonZero.getNode()) {
6346 FirstNonZero = Elt;
6347 FirstNonZeroIdx = i;
6348 }
6349 }
6350
6351 assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!")(static_cast <bool> (FirstNonZero.getNode() && "Unexpected build vector of all zeros!"
) ? void (0) : __assert_fail ("FirstNonZero.getNode() && \"Unexpected build vector of all zeros!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6351, __extension__ __PRETTY_FUNCTION__))
;
6352 SDValue V1 = FirstNonZero.getOperand(0);
6353 MVT VT = V1.getSimpleValueType();
6354
6355 // See if this build_vector can be lowered as a blend with zero.
6356 SDValue Elt;
6357 unsigned EltMaskIdx, EltIdx;
6358 int Mask[4];
6359 for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
6360 if (Zeroable[EltIdx]) {
6361 // The zero vector will be on the right hand side.
6362 Mask[EltIdx] = EltIdx+4;
6363 continue;
6364 }
6365
6366 Elt = Op->getOperand(EltIdx);
6367 // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
6368 EltMaskIdx = Elt.getConstantOperandVal(1);
6369 if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
6370 break;
6371 Mask[EltIdx] = EltIdx;
6372 }
6373
6374 if (EltIdx == 4) {
6375 // Let the shuffle legalizer deal with blend operations.
6376 SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
6377 if (V1.getSimpleValueType() != VT)
6378 V1 = DAG.getBitcast(VT, V1);
6379 return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
6380 }
6381
6382 // See if we can lower this build_vector to a INSERTPS.
6383 if (!Subtarget.hasSSE41())
6384 return SDValue();
6385
6386 SDValue V2 = Elt.getOperand(0);
6387 if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
6388 V1 = SDValue();
6389
6390 bool CanFold = true;
6391 for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
6392 if (Zeroable[i])
6393 continue;
6394
6395 SDValue Current = Op->getOperand(i);
6396 SDValue SrcVector = Current->getOperand(0);
6397 if (!V1.getNode())
6398 V1 = SrcVector;
6399 CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
6400 }
6401
6402 if (!CanFold)
6403 return SDValue();
6404
6405 assert(V1.getNode() && "Expected at least two non-zero elements!")(static_cast <bool> (V1.getNode() && "Expected at least two non-zero elements!"
) ? void (0) : __assert_fail ("V1.getNode() && \"Expected at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6405, __extension__ __PRETTY_FUNCTION__))
;
6406 if (V1.getSimpleValueType() != MVT::v4f32)
6407 V1 = DAG.getBitcast(MVT::v4f32, V1);
6408 if (V2.getSimpleValueType() != MVT::v4f32)
6409 V2 = DAG.getBitcast(MVT::v4f32, V2);
6410
6411 // Ok, we can emit an INSERTPS instruction.
6412 unsigned ZMask = Zeroable.to_ulong();
6413
6414 unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
6415 assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!")(static_cast <bool> ((InsertPSMask & ~0xFFu) == 0 &&
"Invalid mask!") ? void (0) : __assert_fail ("(InsertPSMask & ~0xFFu) == 0 && \"Invalid mask!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6415, __extension__ __PRETTY_FUNCTION__))
;
6416 SDLoc DL(Op);
6417 SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
6418 DAG.getIntPtrConstant(InsertPSMask, DL));
6419 return DAG.getBitcast(VT, Result);
6420}
6421
6422/// Return a vector logical shift node.
6423static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
6424 SelectionDAG &DAG, const TargetLowering &TLI,
6425 const SDLoc &dl) {
6426 assert(VT.is128BitVector() && "Unknown type for VShift")(static_cast <bool> (VT.is128BitVector() && "Unknown type for VShift"
) ? void (0) : __assert_fail ("VT.is128BitVector() && \"Unknown type for VShift\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6426, __extension__ __PRETTY_FUNCTION__))
;
6427 MVT ShVT = MVT::v16i8;
6428 unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
6429 SrcOp = DAG.getBitcast(ShVT, SrcOp);
6430 MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
6431 assert(NumBits % 8 == 0 && "Only support byte sized shifts")(static_cast <bool> (NumBits % 8 == 0 && "Only support byte sized shifts"
) ? void (0) : __assert_fail ("NumBits % 8 == 0 && \"Only support byte sized shifts\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6431, __extension__ __PRETTY_FUNCTION__))
;
6432 SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
6433 return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
6434}
6435
6436static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
6437 SelectionDAG &DAG) {
6438
6439 // Check if the scalar load can be widened into a vector load. And if
6440 // the address is "base + cst" see if the cst can be "absorbed" into
6441 // the shuffle mask.
6442 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
6443 SDValue Ptr = LD->getBasePtr();
6444 if (!ISD::isNormalLoad(LD) || LD->isVolatile())
6445 return SDValue();
6446 EVT PVT = LD->getValueType(0);
6447 if (PVT != MVT::i32 && PVT != MVT::f32)
6448 return SDValue();
6449
6450 int FI = -1;
6451 int64_t Offset = 0;
6452 if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
6453 FI = FINode->getIndex();
6454 Offset = 0;
6455 } else if (DAG.isBaseWithConstantOffset(Ptr) &&
6456 isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
6457 FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
6458 Offset = Ptr.getConstantOperandVal(1);
6459 Ptr = Ptr.getOperand(0);
6460 } else {
6461 return SDValue();
6462 }
6463
6464 // FIXME: 256-bit vector instructions don't require a strict alignment,
6465 // improve this code to support it better.
6466 unsigned RequiredAlign = VT.getSizeInBits()/8;
6467 SDValue Chain = LD->getChain();
6468 // Make sure the stack object alignment is at least 16 or 32.
6469 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6470 if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
6471 if (MFI.isFixedObjectIndex(FI)) {
6472 // Can't change the alignment. FIXME: It's possible to compute
6473 // the exact stack offset and reference FI + adjust offset instead.
6474 // If someone *really* cares about this. That's the way to implement it.
6475 return SDValue();
6476 } else {
6477 MFI.setObjectAlignment(FI, RequiredAlign);
6478 }
6479 }
6480
6481 // (Offset % 16 or 32) must be multiple of 4. Then address is then
6482 // Ptr + (Offset & ~15).
6483 if (Offset < 0)
6484 return SDValue();
6485 if ((Offset % RequiredAlign) & 3)
6486 return SDValue();
6487 int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
6488 if (StartOffset) {
6489 SDLoc DL(Ptr);
6490 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6491 DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
6492 }
6493
6494 int EltNo = (Offset - StartOffset) >> 2;
6495 unsigned NumElems = VT.getVectorNumElements();
6496
6497 EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
6498 SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
6499 LD->getPointerInfo().getWithOffset(StartOffset));
6500
6501 SmallVector<int, 8> Mask(NumElems, EltNo);
6502
6503 return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask);
6504 }
6505
6506 return SDValue();
6507}
6508
6509/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
6510/// elements can be replaced by a single large load which has the same value as
6511/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
6512///
6513/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
6514static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
6515 const SDLoc &DL, SelectionDAG &DAG,
6516 const X86Subtarget &Subtarget,
6517 bool isAfterLegalize) {
6518 unsigned NumElems = Elts.size();
6519
6520 int LastLoadedElt = -1;
6521 SmallBitVector LoadMask(NumElems, false);
6522 SmallBitVector ZeroMask(NumElems, false);
6
Calling constructor for 'SmallBitVector'
9
Returning from constructor for 'SmallBitVector'
6523 SmallBitVector UndefMask(NumElems, false);
6524
6525 // For each element in the initializer, see if we've found a load, zero or an
6526 // undef.
6527 for (unsigned i = 0; i < NumElems; ++i) {
10
Loop condition is true. Entering loop body
19
Loop condition is true. Entering loop body
28
Loop condition is true. Entering loop body
6528 SDValue Elt = peekThroughBitcasts(Elts[i]);
6529 if (!Elt.getNode())
11
Assuming the condition is false
12
Taking false branch
20
Assuming the condition is false
21
Taking false branch
29
Assuming the condition is false
30
Taking false branch
6530 return SDValue();
6531
6532 if (Elt.isUndef())
13
Taking false branch
22
Taking false branch
31
Taking false branch
6533 UndefMask[i] = true;
6534 else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode()))
14
Assuming the condition is false
15
Assuming the condition is false
16
Taking false branch
23
Assuming the condition is false
24
Assuming the condition is false
25
Taking false branch
6535 ZeroMask[i] = true;
32
Calling 'reference::operator='
6536 else if (ISD::isNON_EXTLoad(Elt.getNode())) {
17
Taking true branch
26
Taking true branch
6537 LoadMask[i] = true;
6538 LastLoadedElt = i;
6539 // Each loaded element must be the correct fractional portion of the
6540 // requested vector load.
6541 if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
18
Taking false branch
27
Taking false branch
6542 return SDValue();
6543 } else
6544 return SDValue();
6545 }
6546 assert((ZeroMask | UndefMask | LoadMask).count() == NumElems &&(static_cast <bool> ((ZeroMask | UndefMask | LoadMask).
count() == NumElems && "Incomplete element masks") ? void
(0) : __assert_fail ("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6547, __extension__ __PRETTY_FUNCTION__))
6547 "Incomplete element masks")(static_cast <bool> ((ZeroMask | UndefMask | LoadMask).
count() == NumElems && "Incomplete element masks") ? void
(0) : __assert_fail ("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6547, __extension__ __PRETTY_FUNCTION__))
;
6548
6549 // Handle Special Cases - all undef or undef/zero.
6550 if (UndefMask.count() == NumElems)
6551 return DAG.getUNDEF(VT);
6552
6553 // FIXME: Should we return this as a BUILD_VECTOR instead?
6554 if ((ZeroMask | UndefMask).count() == NumElems)
6555 return VT.isInteger() ? DAG.getConstant(0, DL, VT)
6556 : DAG.getConstantFP(0.0, DL, VT);
6557
6558 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6559 int FirstLoadedElt = LoadMask.find_first();
6560 SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
6561 LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
6562 EVT LDBaseVT = EltBase.getValueType();
6563
6564 // Consecutive loads can contain UNDEFS but not ZERO elements.
6565 // Consecutive loads with UNDEFs and ZEROs elements require a
6566 // an additional shuffle stage to clear the ZERO elements.
6567 bool IsConsecutiveLoad = true;
6568 bool IsConsecutiveLoadWithZeros = true;
6569 for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
6570 if (LoadMask[i]) {
6571 SDValue Elt = peekThroughBitcasts(Elts[i]);
6572 LoadSDNode *LD = cast<LoadSDNode>(Elt);
6573 if (!DAG.areNonVolatileConsecutiveLoads(
6574 LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
6575 i - FirstLoadedElt)) {
6576 IsConsecutiveLoad = false;
6577 IsConsecutiveLoadWithZeros = false;
6578 break;
6579 }
6580 } else if (ZeroMask[i]) {
6581 IsConsecutiveLoad = false;
6582 }
6583 }
6584
6585 SmallVector<LoadSDNode *, 8> Loads;
6586 for (int i = FirstLoadedElt; i <= LastLoadedElt; ++i)
6587 if (LoadMask[i])
6588 Loads.push_back(cast<LoadSDNode>(peekThroughBitcasts(Elts[i])));
6589
6590 auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
6591 auto MMOFlags = LDBase->getMemOperand()->getFlags();
6592 assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&(static_cast <bool> (!(MMOFlags & MachineMemOperand
::MOVolatile) && "Cannot merge volatile loads.") ? void
(0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6593, __extension__ __PRETTY_FUNCTION__))
6593 "Cannot merge volatile loads.")(static_cast <bool> (!(MMOFlags & MachineMemOperand
::MOVolatile) && "Cannot merge volatile loads.") ? void
(0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6593, __extension__ __PRETTY_FUNCTION__))
;
6594 SDValue NewLd =
6595 DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
6596 LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
6597 for (auto *LD : Loads)
6598 DAG.makeEquivalentMemoryOrdering(LD, NewLd);
6599 return NewLd;
6600 };
6601
6602 // LOAD - all consecutive load/undefs (must start/end with a load).
6603 // If we have found an entire vector of loads and undefs, then return a large
6604 // load of the entire vector width starting at the base pointer.
6605 // If the vector contains zeros, then attempt to shuffle those elements.
6606 if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
6607 (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
6608 assert(LDBase && "Did not find base load for merging consecutive loads")(static_cast <bool> (LDBase && "Did not find base load for merging consecutive loads"
) ? void (0) : __assert_fail ("LDBase && \"Did not find base load for merging consecutive loads\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6608, __extension__ __PRETTY_FUNCTION__))
;
6609 EVT EltVT = LDBase->getValueType(0);
6610 // Ensure that the input vector size for the merged loads matches the
6611 // cumulative size of the input elements.
6612 if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
6613 return SDValue();
6614
6615 if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
6616 return SDValue();
6617
6618 // Don't create 256-bit non-temporal aligned loads without AVX2 as these
6619 // will lower to regular temporal loads and use the cache.
6620 if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 &&
6621 VT.is256BitVector() && !Subtarget.hasInt256())
6622 return SDValue();
6623
6624 if (IsConsecutiveLoad)
6625 return CreateLoad(VT, LDBase);
6626
6627 // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
6628 // vector and a zero vector to clear out the zero elements.
6629 if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
6630 SmallVector<int, 4> ClearMask(NumElems, -1);
6631 for (unsigned i = 0; i < NumElems; ++i) {
6632 if (ZeroMask[i])
6633 ClearMask[i] = i + NumElems;
6634 else if (LoadMask[i])
6635 ClearMask[i] = i;
6636 }
6637 SDValue V = CreateLoad(VT, LDBase);
6638 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
6639 : DAG.getConstantFP(0.0, DL, VT);
6640 return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
6641 }
6642 }
6643
6644 int LoadSize =
6645 (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
6646
6647 // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
6648 if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
6649 (LoadSize == 32 || LoadSize == 64) &&
6650 ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
6651 MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
6652 : MVT::getIntegerVT(LoadSize);
6653 MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
6654 if (TLI.isTypeLegal(VecVT)) {
6655 SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
6656 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
6657 SDValue ResNode =
6658 DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
6659 LDBase->getPointerInfo(),
6660 LDBase->getAlignment(),
6661 false/*isVolatile*/, true/*ReadMem*/,
6662 false/*WriteMem*/);
6663 for (auto *LD : Loads)
6664 DAG.makeEquivalentMemoryOrdering(LD, ResNode);
6665 return DAG.getBitcast(VT, ResNode);
6666 }
6667 }
6668
6669 return SDValue();
6670}
6671
6672static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
6673 unsigned SplatBitSize, LLVMContext &C) {
6674 unsigned ScalarSize = VT.getScalarSizeInBits();
6675 unsigned NumElm = SplatBitSize / ScalarSize;
6676
6677 SmallVector<Constant *, 32> ConstantVec;
6678 for (unsigned i = 0; i < NumElm; i++) {
6679 APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
6680 Constant *Const;
6681 if (VT.isFloatingPoint()) {
6682 if (ScalarSize == 32) {
6683 Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
6684 } else {
6685 assert(ScalarSize == 64 && "Unsupported floating point scalar size")(static_cast <bool> (ScalarSize == 64 && "Unsupported floating point scalar size"
) ? void (0) : __assert_fail ("ScalarSize == 64 && \"Unsupported floating point scalar size\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6685, __extension__ __PRETTY_FUNCTION__))
;
6686 Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
6687 }
6688 } else
6689 Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
6690 ConstantVec.push_back(Const);
6691 }
6692 return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
6693}
6694
6695static bool isUseOfShuffle(SDNode *N) {
6696 for (auto *U : N->uses()) {
6697 if (isTargetShuffle(U->getOpcode()))
6698 return true;
6699 if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
6700 return isUseOfShuffle(U);
6701 }
6702 return false;
6703}
6704
6705// Check if the current node of build vector is a zero extended vector.
6706// // If so, return the value extended.
6707// // For example: (0,0,0,a,0,0,0,a,0,0,0,a,0,0,0,a) returns a.
6708// // NumElt - return the number of zero extended identical values.
6709// // EltType - return the type of the value include the zero extend.
6710static SDValue isSplatZeroExtended(const BuildVectorSDNode *Op,
6711 unsigned &NumElt, MVT &EltType) {
6712 SDValue ExtValue = Op->getOperand(0);
6713 unsigned NumElts = Op->getNumOperands();
6714 unsigned Delta = NumElts;
6715
6716 for (unsigned i = 1; i < NumElts; i++) {
6717 if (Op->getOperand(i) == ExtValue) {
6718 Delta = i;
6719 break;
6720 }
6721 if (!(Op->getOperand(i).isUndef() || isNullConstant(Op->getOperand(i))))
6722 return SDValue();
6723 }
6724 if (!isPowerOf2_32(Delta) || Delta == 1)
6725 return SDValue();
6726
6727 for (unsigned i = Delta; i < NumElts; i++) {
6728 if (i % Delta == 0) {
6729 if (Op->getOperand(i) != ExtValue)
6730 return SDValue();
6731 } else if (!(isNullConstant(Op->getOperand(i)) ||
6732 Op->getOperand(i).isUndef()))
6733 return SDValue();
6734 }
6735 unsigned EltSize = Op->getSimpleValueType(0).getScalarSizeInBits();
6736 unsigned ExtVTSize = EltSize * Delta;
6737 EltType = MVT::getIntegerVT(ExtVTSize);
6738 NumElt = NumElts / Delta;
6739 return ExtValue;
6740}
6741
6742/// Attempt to use the vbroadcast instruction to generate a splat value
6743/// from a splat BUILD_VECTOR which uses:
6744/// a. A single scalar load, or a constant.
6745/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
6746///
6747/// The VBROADCAST node is returned when a pattern is found,
6748/// or SDValue() otherwise.
6749static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
6750 const X86Subtarget &Subtarget,
6751 SelectionDAG &DAG) {
6752 // VBROADCAST requires AVX.
6753 // TODO: Splats could be generated for non-AVX CPUs using SSE
6754 // instructions, but there's less potential gain for only 128-bit vectors.
6755 if (!Subtarget.hasAVX())
6756 return SDValue();
6757
6758 MVT VT = BVOp->getSimpleValueType(0);
6759 SDLoc dl(BVOp);
6760
6761 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Unsupported vector type for broadcast."
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6762, __extension__ __PRETTY_FUNCTION__))
6762 "Unsupported vector type for broadcast.")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Unsupported vector type for broadcast."
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6762, __extension__ __PRETTY_FUNCTION__))
;
6763
6764 BitVector UndefElements;
6765 SDValue Ld = BVOp->getSplatValue(&UndefElements);
6766
6767 // Attempt to use VBROADCASTM
6768 // From this paterrn:
6769 // a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
6770 // b. t1 = (build_vector t0 t0)
6771 //
6772 // Create (VBROADCASTM v2i1 X)
6773 if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
6774 MVT EltType = VT.getScalarType();
6775 unsigned NumElts = VT.getVectorNumElements();
6776 SDValue BOperand;
6777 SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType);
6778 if ((ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) ||
6779 (Ld && Ld.getOpcode() == ISD::ZERO_EXTEND &&
6780 Ld.getOperand(0).getOpcode() == ISD::BITCAST)) {
6781 if (ZeroExtended)
6782 BOperand = ZeroExtended.getOperand(0);
6783 else
6784 BOperand = Ld.getOperand(0).getOperand(0);
6785 if (BOperand.getValueType().isVector() &&
6786 BOperand.getSimpleValueType().getVectorElementType() == MVT::i1) {
6787 if ((EltType == MVT::i64 && (VT.getVectorElementType() == MVT::i8 ||
6788 NumElts == 8)) || // for broadcastmb2q
6789 (EltType == MVT::i32 && (VT.getVectorElementType() == MVT::i16 ||
6790 NumElts == 16))) { // for broadcastmw2d
6791 SDValue Brdcst =
6792 DAG.getNode(X86ISD::VBROADCASTM, dl,
6793 MVT::getVectorVT(EltType, NumElts), BOperand);
6794 return DAG.getBitcast(VT, Brdcst);
6795 }
6796 }
6797 }
6798 }
6799
6800 // We need a splat of a single value to use broadcast, and it doesn't
6801 // make any sense if the value is only in one element of the vector.
6802 if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
6803 APInt SplatValue, Undef;
6804 unsigned SplatBitSize;
6805 bool HasUndef;
6806 // Check if this is a repeated constant pattern suitable for broadcasting.
6807 if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
6808 SplatBitSize > VT.getScalarSizeInBits() &&
6809 SplatBitSize < VT.getSizeInBits()) {
6810 // Avoid replacing with broadcast when it's a use of a shuffle
6811 // instruction to preserve the present custom lowering of shuffles.
6812 if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
6813 return SDValue();
6814 // replace BUILD_VECTOR with broadcast of the repeated constants.
6815 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6816 LLVMContext *Ctx = DAG.getContext();
6817 MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
6818 if (Subtarget.hasAVX()) {
6819 if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
6820 !(SplatBitSize == 64 && Subtarget.is32Bit())) {
6821 // Splatted value can fit in one INTEGER constant in constant pool.
6822 // Load the constant and broadcast it.
6823 MVT CVT = MVT::getIntegerVT(SplatBitSize);
6824 Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
6825 Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
6826 SDValue CP = DAG.getConstantPool(C, PVT);
6827 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6828
6829 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6830 Ld = DAG.getLoad(
6831 CVT, dl, DAG.getEntryNode(), CP,
6832 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6833 Alignment);
6834 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6835 MVT::getVectorVT(CVT, Repeat), Ld);
6836 return DAG.getBitcast(VT, Brdcst);
6837 } else if (SplatBitSize == 32 || SplatBitSize == 64) {
6838 // Splatted value can fit in one FLOAT constant in constant pool.
6839 // Load the constant and broadcast it.
6840 // AVX have support for 32 and 64 bit broadcast for floats only.
6841 // No 64bit integer in 32bit subtarget.
6842 MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
6843 // Lower the splat via APFloat directly, to avoid any conversion.
6844 Constant *C =
6845 SplatBitSize == 32
6846 ? ConstantFP::get(*Ctx,
6847 APFloat(APFloat::IEEEsingle(), SplatValue))
6848 : ConstantFP::get(*Ctx,
6849 APFloat(APFloat::IEEEdouble(), SplatValue));
6850 SDValue CP = DAG.getConstantPool(C, PVT);
6851 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6852
6853 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6854 Ld = DAG.getLoad(
6855 CVT, dl, DAG.getEntryNode(), CP,
6856 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6857 Alignment);
6858 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6859 MVT::getVectorVT(CVT, Repeat), Ld);
6860 return DAG.getBitcast(VT, Brdcst);
6861 } else if (SplatBitSize > 64) {
6862 // Load the vector of constants and broadcast it.
6863 MVT CVT = VT.getScalarType();
6864 Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
6865 *Ctx);
6866 SDValue VCP = DAG.getConstantPool(VecC, PVT);
6867 unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
6868 unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
6869 Ld = DAG.getLoad(
6870 MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
6871 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6872 Alignment);
6873 SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
6874 return DAG.getBitcast(VT, Brdcst);
6875 }
6876 }
6877 }
6878 return SDValue();
6879 }
6880
6881 bool ConstSplatVal =
6882 (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
6883
6884 // Make sure that all of the users of a non-constant load are from the
6885 // BUILD_VECTOR node.
6886 if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
6887 return SDValue();
6888
6889 unsigned ScalarSize = Ld.getValueSizeInBits();
6890 bool IsGE256 = (VT.getSizeInBits() >= 256);
6891
6892 // When optimizing for size, generate up to 5 extra bytes for a broadcast
6893 // instruction to save 8 or more bytes of constant pool data.
6894 // TODO: If multiple splats are generated to load the same constant,
6895 // it may be detrimental to overall size. There needs to be a way to detect
6896 // that condition to know if this is truly a size win.
6897 bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
6898
6899 // Handle broadcasting a single constant scalar from the constant pool
6900 // into a vector.
6901 // On Sandybridge (no AVX2), it is still better to load a constant vector
6902 // from the constant pool and not to broadcast it from a scalar.
6903 // But override that restriction when optimizing for size.
6904 // TODO: Check if splatting is recommended for other AVX-capable CPUs.
6905 if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) {
6906 EVT CVT = Ld.getValueType();
6907 assert(!CVT.isVector() && "Must not broadcast a vector type")(static_cast <bool> (!CVT.isVector() && "Must not broadcast a vector type"
) ? void (0) : __assert_fail ("!CVT.isVector() && \"Must not broadcast a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6907, __extension__ __PRETTY_FUNCTION__))
;
6908
6909 // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
6910 // For size optimization, also splat v2f64 and v2i64, and for size opt
6911 // with AVX2, also splat i8 and i16.
6912 // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
6913 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6914 (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
6915 const Constant *C = nullptr;
6916 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
6917 C = CI->getConstantIntValue();
6918 else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
6919 C = CF->getConstantFPValue();
6920
6921 assert(C && "Invalid constant type")(static_cast <bool> (C && "Invalid constant type"
) ? void (0) : __assert_fail ("C && \"Invalid constant type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6921, __extension__ __PRETTY_FUNCTION__))
;
6922
6923 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6924 SDValue CP =
6925 DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
6926 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6927 Ld = DAG.getLoad(
6928 CVT, dl, DAG.getEntryNode(), CP,
6929 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6930 Alignment);
6931
6932 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6933 }
6934 }
6935
6936 bool IsLoad = ISD::isNormalLoad(Ld.getNode());
6937
6938 // Handle AVX2 in-register broadcasts.
6939 if (!IsLoad && Subtarget.hasInt256() &&
6940 (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
6941 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6942
6943 // The scalar source must be a normal load.
6944 if (!IsLoad)
6945 return SDValue();
6946
6947 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6948 (Subtarget.hasVLX() && ScalarSize == 64))
6949 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6950
6951 // The integer check is needed for the 64-bit into 128-bit so it doesn't match
6952 // double since there is no vbroadcastsd xmm
6953 if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
6954 if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
6955 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6956 }
6957
6958 // Unsupported broadcast.
6959 return SDValue();
6960}
6961
6962/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
6963/// underlying vector and index.
6964///
6965/// Modifies \p ExtractedFromVec to the real vector and returns the real
6966/// index.
6967static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
6968 SDValue ExtIdx) {
6969 int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
6970 if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
6971 return Idx;
6972
6973 // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
6974 // lowered this:
6975 // (extract_vector_elt (v8f32 %vreg1), Constant<6>)
6976 // to:
6977 // (extract_vector_elt (vector_shuffle<2,u,u,u>
6978 // (extract_subvector (v8f32 %vreg0), Constant<4>),
6979 // undef)
6980 // Constant<0>)
6981 // In this case the vector is the extract_subvector expression and the index
6982 // is 2, as specified by the shuffle.
6983 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
6984 SDValue ShuffleVec = SVOp->getOperand(0);
6985 MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
6986 assert(ShuffleVecVT.getVectorElementType() ==(static_cast <bool> (ShuffleVecVT.getVectorElementType(
) == ExtractedFromVec.getSimpleValueType().getVectorElementType
()) ? void (0) : __assert_fail ("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6987, __extension__ __PRETTY_FUNCTION__))
6987 ExtractedFromVec.getSimpleValueType().getVectorElementType())(static_cast <bool> (ShuffleVecVT.getVectorElementType(
) == ExtractedFromVec.getSimpleValueType().getVectorElementType
()) ? void (0) : __assert_fail ("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 6987, __extension__ __PRETTY_FUNCTION__))
;
6988
6989 int ShuffleIdx = SVOp->getMaskElt(Idx);
6990 if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
6991 ExtractedFromVec = ShuffleVec;
6992 return ShuffleIdx;
6993 }
6994 return Idx;
6995}
6996
6997static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
6998 MVT VT = Op.getSimpleValueType();
6999
7000 // Skip if insert_vec_elt is not supported.
7001 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7002 if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
7003 return SDValue();
7004
7005 SDLoc DL(Op);
7006 unsigned NumElems = Op.getNumOperands();
7007
7008 SDValue VecIn1;
7009 SDValue VecIn2;
7010 SmallVector<unsigned, 4> InsertIndices;
7011 SmallVector<int, 8> Mask(NumElems, -1);
7012
7013 for (unsigned i = 0; i != NumElems; ++i) {
7014 unsigned Opc = Op.getOperand(i).getOpcode();
7015
7016 if (Opc == ISD::UNDEF)
7017 continue;
7018
7019 if (Opc != ISD::EXTRACT_VECTOR_ELT) {
7020 // Quit if more than 1 elements need inserting.
7021 if (InsertIndices.size() > 1)
7022 return SDValue();
7023
7024 InsertIndices.push_back(i);
7025 continue;
7026 }
7027
7028 SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
7029 SDValue ExtIdx = Op.getOperand(i).getOperand(1);
7030
7031 // Quit if non-constant index.
7032 if (!isa<ConstantSDNode>(ExtIdx))
7033 return SDValue();
7034 int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
7035
7036 // Quit if extracted from vector of different type.
7037 if (ExtractedFromVec.getValueType() != VT)
7038 return SDValue();
7039
7040 if (!VecIn1.getNode())
7041 VecIn1 = ExtractedFromVec;
7042 else if (VecIn1 != ExtractedFromVec) {
7043 if (!VecIn2.getNode())
7044 VecIn2 = ExtractedFromVec;
7045 else if (VecIn2 != ExtractedFromVec)
7046 // Quit if more than 2 vectors to shuffle
7047 return SDValue();
7048 }
7049
7050 if (ExtractedFromVec == VecIn1)
7051 Mask[i] = Idx;
7052 else if (ExtractedFromVec == VecIn2)
7053 Mask[i] = Idx + NumElems;
7054 }
7055
7056 if (!VecIn1.getNode())
7057 return SDValue();
7058
7059 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
7060 SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
7061
7062 for (unsigned Idx : InsertIndices)
7063 NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
7064 DAG.getIntPtrConstant(Idx, DL));
7065
7066 return NV;
7067}
7068
7069static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
7070 assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&(static_cast <bool> (ISD::isBuildVectorOfConstantSDNodes
(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector") ? void (0) : __assert_fail
("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7072, __extension__ __PRETTY_FUNCTION__))
7071 Op.getScalarValueSizeInBits() == 1 &&(static_cast <bool> (ISD::isBuildVectorOfConstantSDNodes
(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector") ? void (0) : __assert_fail
("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7072, __extension__ __PRETTY_FUNCTION__))
7072 "Can not convert non-constant vector")(static_cast <bool> (ISD::isBuildVectorOfConstantSDNodes
(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector") ? void (0) : __assert_fail
("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7072, __extension__ __PRETTY_FUNCTION__))
;
7073 uint64_t Immediate = 0;
7074 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
7075 SDValue In = Op.getOperand(idx);
7076 if (!In.isUndef())
7077 Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
7078 }
7079 SDLoc dl(Op);
7080 MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
7081 return DAG.getConstant(Immediate, dl, VT);
7082}
7083// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
7084SDValue
7085X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
7086
7087 MVT VT = Op.getSimpleValueType();
7088 assert((VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) && "Unexpected type in LowerBUILD_VECTORvXi1!") ?
void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7089, __extension__ __PRETTY_FUNCTION__))
7089 "Unexpected type in LowerBUILD_VECTORvXi1!")(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) && "Unexpected type in LowerBUILD_VECTORvXi1!") ?
void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7089, __extension__ __PRETTY_FUNCTION__))
;
7090
7091 SDLoc dl(Op);
7092 if (ISD::isBuildVectorAllZeros(Op.getNode()))
7093 return DAG.getTargetConstant(0, dl, VT);
7094
7095 if (ISD::isBuildVectorAllOnes(Op.getNode()))
7096 return DAG.getTargetConstant(1, dl, VT);
7097
7098 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
7099 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
7100 // Split the pieces.
7101 SDValue Lower =
7102 DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32));
7103 SDValue Upper =
7104 DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
7105 // We have to manually lower both halves so getNode doesn't try to
7106 // reassemble the build_vector.
7107 Lower = LowerBUILD_VECTORvXi1(Lower, DAG);
7108 Upper = LowerBUILD_VECTORvXi1(Upper, DAG);
7109 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
7110 }
7111 SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
7112 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
7113 return DAG.getBitcast(VT, Imm);
7114 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
7115 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
7116 DAG.getIntPtrConstant(0, dl));
7117 }
7118
7119 // Vector has one or more non-const elements
7120 uint64_t Immediate = 0;
7121 SmallVector<unsigned, 16> NonConstIdx;
7122 bool IsSplat = true;
7123 bool HasConstElts = false;
7124 int SplatIdx = -1;
7125 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
7126 SDValue In = Op.getOperand(idx);
7127 if (In.isUndef())
7128 continue;
7129 if (!isa<ConstantSDNode>(In))
7130 NonConstIdx.push_back(idx);
7131 else {
7132 Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
7133 HasConstElts = true;
7134 }
7135 if (SplatIdx < 0)
7136 SplatIdx = idx;
7137 else if (In != Op.getOperand(SplatIdx))
7138 IsSplat = false;
7139 }
7140
7141 // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
7142 if (IsSplat)
7143 return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
7144 DAG.getConstant(1, dl, VT),
7145 DAG.getConstant(0, dl, VT));
7146
7147 // insert elements one by one
7148 SDValue DstVec;
7149 SDValue Imm;
7150 if (Immediate) {
7151 MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
7152 Imm = DAG.getConstant(Immediate, dl, ImmVT);
7153 }
7154 else if (HasConstElts)
7155 Imm = DAG.getConstant(0, dl, VT);
7156 else
7157 Imm = DAG.getUNDEF(VT);
7158 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
7159 DstVec = DAG.getBitcast(VT, Imm);
7160 else {
7161 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
7162 DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
7163 DAG.getIntPtrConstant(0, dl));
7164 }
7165
7166 for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
7167 unsigned InsertIdx = NonConstIdx[i];
7168 DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
7169 Op.getOperand(InsertIdx),
7170 DAG.getIntPtrConstant(InsertIdx, dl));
7171 }
7172 return DstVec;
7173}
7174
7175/// \brief Return true if \p N implements a horizontal binop and return the
7176/// operands for the horizontal binop into V0 and V1.
7177///
7178/// This is a helper function of LowerToHorizontalOp().
7179/// This function checks that the build_vector \p N in input implements a
7180/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
7181/// operation to match.
7182/// For example, if \p Opcode is equal to ISD::ADD, then this function
7183/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
7184/// is equal to ISD::SUB, then this function checks if this is a horizontal
7185/// arithmetic sub.
7186///
7187/// This function only analyzes elements of \p N whose indices are
7188/// in range [BaseIdx, LastIdx).
7189static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
7190 SelectionDAG &DAG,
7191 unsigned BaseIdx, unsigned LastIdx,
7192 SDValue &V0, SDValue &V1) {
7193 EVT VT = N->getValueType(0);
7194
7195 assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!")(static_cast <bool> (BaseIdx * 2 <= LastIdx &&
"Invalid Indices in input!") ? void (0) : __assert_fail ("BaseIdx * 2 <= LastIdx && \"Invalid Indices in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7195, __extension__ __PRETTY_FUNCTION__))
;
7196 assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&(static_cast <bool> (VT.isVector() && VT.getVectorNumElements
() >= LastIdx && "Invalid Vector in input!") ? void
(0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7197, __extension__ __PRETTY_FUNCTION__))
7197 "Invalid Vector in input!")(static_cast <bool> (VT.isVector() && VT.getVectorNumElements
() >= LastIdx && "Invalid Vector in input!") ? void
(0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7197, __extension__ __PRETTY_FUNCTION__))
;
7198
7199 bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
7200 bool CanFold = true;
7201 unsigned ExpectedVExtractIdx = BaseIdx;
7202 unsigned NumElts = LastIdx - BaseIdx;
7203 V0 = DAG.getUNDEF(VT);
7204 V1 = DAG.getUNDEF(VT);
7205
7206 // Check if N implements a horizontal binop.
7207 for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
7208 SDValue Op = N->getOperand(i + BaseIdx);
7209
7210 // Skip UNDEFs.
7211 if (Op->isUndef()) {
7212 // Update the expected vector extract index.
7213 if (i * 2 == NumElts)
7214 ExpectedVExtractIdx = BaseIdx;
7215 ExpectedVExtractIdx += 2;
7216 continue;
7217 }
7218
7219 CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
7220
7221 if (!CanFold)
7222 break;
7223
7224 SDValue Op0 = Op.getOperand(0);
7225 SDValue Op1 = Op.getOperand(1);
7226
7227 // Try to match the following pattern:
7228 // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
7229 CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7230 Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7231 Op0.getOperand(0) == Op1.getOperand(0) &&
7232 isa<ConstantSDNode>(Op0.getOperand(1)) &&
7233 isa<ConstantSDNode>(Op1.getOperand(1)));
7234 if (!CanFold)
7235 break;
7236
7237 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7238 unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
7239
7240 if (i * 2 < NumElts) {
7241 if (V0.isUndef()) {
7242 V0 = Op0.getOperand(0);
7243 if (V0.getValueType() != VT)
7244 return false;
7245 }
7246 } else {
7247 if (V1.isUndef()) {
7248 V1 = Op0.getOperand(0);
7249 if (V1.getValueType() != VT)
7250 return false;
7251 }
7252 if (i * 2 == NumElts)
7253 ExpectedVExtractIdx = BaseIdx;
7254 }
7255
7256 SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
7257 if (I0 == ExpectedVExtractIdx)
7258 CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
7259 else if (IsCommutable && I1 == ExpectedVExtractIdx) {
7260 // Try to match the following dag sequence:
7261 // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
7262 CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
7263 } else
7264 CanFold = false;
7265
7266 ExpectedVExtractIdx += 2;
7267 }
7268
7269 return CanFold;
7270}
7271
7272/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
7273/// a concat_vector.
7274///
7275/// This is a helper function of LowerToHorizontalOp().
7276/// This function expects two 256-bit vectors called V0 and V1.
7277/// At first, each vector is split into two separate 128-bit vectors.
7278/// Then, the resulting 128-bit vectors are used to implement two
7279/// horizontal binary operations.
7280///
7281/// The kind of horizontal binary operation is defined by \p X86Opcode.
7282///
7283/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
7284/// the two new horizontal binop.
7285/// When Mode is set, the first horizontal binop dag node would take as input
7286/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
7287/// horizontal binop dag node would take as input the lower 128-bit of V1
7288/// and the upper 128-bit of V1.
7289/// Example:
7290/// HADD V0_LO, V0_HI
7291/// HADD V1_LO, V1_HI
7292///
7293/// Otherwise, the first horizontal binop dag node takes as input the lower
7294/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
7295/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
7296/// Example:
7297/// HADD V0_LO, V1_LO
7298/// HADD V0_HI, V1_HI
7299///
7300/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
7301/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
7302/// the upper 128-bits of the result.
7303static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
7304 const SDLoc &DL, SelectionDAG &DAG,
7305 unsigned X86Opcode, bool Mode,
7306 bool isUndefLO, bool isUndefHI) {
7307 MVT VT = V0.getSimpleValueType();
7308 assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&(static_cast <bool> (VT.is256BitVector() && VT ==
V1.getSimpleValueType() && "Invalid nodes in input!"
) ? void (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7309, __extension__ __PRETTY_FUNCTION__))
7309 "Invalid nodes in input!")(static_cast <bool> (VT.is256BitVector() && VT ==
V1.getSimpleValueType() && "Invalid nodes in input!"
) ? void (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318801/lib/Target/X86/X86ISelLowering.cpp"
, 7309, __extension__ __PRETTY_FUNCTION__))
;
7310
7311 unsigned NumElts = VT.getVectorNumElements();
7312 SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
7313 SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
7314 SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
7315 SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
7316 MVT NewVT = V0_LO.getSimpleValueType();
7317
7318 SDValue LO = DAG.getUNDEF(NewVT);
7319 SDValue HI = DAG.getUNDEF(NewVT);
7320
7321 if (Mode) {
7322 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7323 if (!isUndefLO && !V0->isUndef())
7324 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
7325 if (!isUndefHI && !V1->isUndef())
7326 HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
7327 } else {
7328 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7329 if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef()))
7330 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
7331
7332 if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef()))
7333 HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
7334 }
7335
7336 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
7337}
7338
7339/// Returns true iff \p BV builds a vector with the result equivalent to
7340/// the result of ADDSUB operation.
7341/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
7342/// are written to the parameters \p Opnd0 and \p Opnd1.
7343static bool isAddSub(const BuildVectorSDNode *BV,
7344 const X86Subtarget &Subtarget, SelectionDAG &DAG,
7345 SDValue &Opnd0, SDValue &Opnd1) {
7346
7347 MVT VT = BV->getSimpleValueType(0);
7348 if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
7349 (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
7350 (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
7351 return false;
7352
7353 unsigned NumElts = VT.getVectorNumElements();
7354 SDValue InVec0 = DAG.getUNDEF(VT);
7355 SDValue InVec1 = DAG.getUNDEF(VT);
7356
7357 // Odd-numbered elements in the input build vector are obtained from
7358 // adding two integer/float elements.
7359 // Even-numbered elements in the input build vector are obtained from
7360 // subtracting two integer/float elements.
7361 unsigned ExpectedOpcode = ISD::FSUB;
7362 unsigned NextExpectedOpcode = ISD::FADD;
7363 bool AddFound = false;
7364 bool SubFound = false;
7365
7366 for (unsigned i = 0, e = NumElts; i != e; ++i) {
7367 SDValue Op = BV->getOperand(i);
7368
7369 // Skip 'undef' values.
7370 unsigned Opcode = Op.getOpcode();
7371 if (Opcode == ISD::UNDEF) {
7372 std::swap(ExpectedOpcode, NextExpectedOpcode);
7373 continue;
7374 }
7375
7376 // Early exit if we found an unexpected opcode.
7377 if (Opcode != ExpectedOpcode)
7378 return false;
7379
7380 SDValue Op0 = Op.getOperand(0);
7381 SDValue Op1 = Op.getOperand(1);
7382
7383 // Try to match the following pattern:
7384 // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
7385 // Early exit if we cannot match that sequence.
7386 if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7387 Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7388 !isa<ConstantSDNode>(Op0.getOperand(1)) ||
7389 !isa<ConstantSDNode>(Op1.getOperand(1)) ||
7390 Op0.getOperand(1) != Op1.getOperand(1))
7391 return false;
7392
7393 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7394 if (I0 != i)
7395 return false;
7396
7397 // We found a valid add/sub node. Update the information accordingly.
7398 if (i & 1)
7399 AddFound = true;
7400 else
7401 SubFound = true;
7402
7403 // Update InVec0 and InVec1.
7404 if (InVec0.isUndef()) {
7405 InVec0 = Op0.getOperand(0);
7406 if (InVec0.getSimpleValueType() != VT)
7407 return false;
7408 }
7409 if (InVec1.isUndef()) {
7410 InVec1 = Op1.getOperand(0);
7411 if (InVec1.getSimpleValueType() != VT)
7412 return false;
7413 }
7414
7415 // Make sure that operands in input to each add/sub node always
7416 // come from a same pair of vectors.
7417 if (InVec0 != Op0.getOperand(0)) {
7418 if (ExpectedOpcode == ISD::FSUB)
7419 return false;
7420
7421 // FADD is commutable. Try to commute the operands
7422 // and then test again.
7423 std::swap(Op0, Op1);
7424 if (InVec0 != Op0.getOperand(0))
7425 return false;
7426 }
7427
7428 if (InVec1 != Op1.getOperand(0))
7429 return false;
7430
7431 // Update the pair of expected opcodes.
7432 std::swap(ExpectedOpcode, NextExpectedOpcode);
7433 }
7434
7435 // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
7436 if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
7437 return false;
7438
7439 Opnd0 = InVec0;
7440 Opnd1 = InVec1;
7441 return true;
7442}
7443
7444/// Returns true if is possible to fold MUL and an idiom that has already been
7445/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
7446/// If (and only if) true is returned, the operands of FMADDSUB are written to
7447/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
7448///
7449/// Prior to calling this function it should be known that there is some
7450/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
7451/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
7452/// before replacement of such SDNode with ADDSUB operation. Thus the number
7453/// of \p Opnd0 uses is expected to be equal to 2.
7454/// For example, this function may be called for the following IR:
7455/// %AB = fmul fast <2 x double> %A, %B
7456/// %Sub = fsub fast <2 x double> %AB, %C
7457/// %Add = fadd fast <2 x double> %AB, %C
7458/// %Addsub =